LIBNAME dev2 "G:\data\teachman\menatti\dev2\stigma3"; 
FILENAME raw "G:\data\teachman\menatti\raw\stigma3"; 
  *for your final, "cleaned" data;
LIBNAME Clean "G:\data\teachman\menatti\clean\stigma3"; 
  *for your final, "cleaned" data;
*This is where all of the HTML pages of the output is kept;
FILENAME Webout "C:\Documents and Settings\Fred Smyth\My Documents\My Dropbox\Teachman NIA RO1\menatti\output\stigma3"; 

/*
/ Program   : Documented Cleaning Script (with code for importing RDE .txt datafiles
/ Version   : 0.2
/ Editor(s) : Nicole M Lindner
/ Date      : April 15, 2009
/ Contact   : nml5d@email.virginia.edu
/======================================================================================================
/ Purpose   : Read in, clean, and transpose all standard datafiles resulting from 
/		data collected at Project Implicit and downloaded through the 
/		RDE (https://rde.projectimpilcit.net)
/ SubMacros : %IatCalc and  %IatAlgorithm, available in sasMacro script 
/		(see http://projectimplicit.net/nosek/papers/scoringalgorithm.sas.txt),
/		Either save each macro (file name same as the macro) in this directory:
/		C:\Program Files\SAS\SAS 9.1\core\sasmacro
/		Or use this line, changing the file location to wherever you have a file with the
/		IAT macros saved: %include 'C:\primary\dataweb\common\SPFD.sas';
 /
/ Notes     : This script:
/		* Reads in the typical datafiles for web studies, either for those from the Demo- and
/		   International tasks (e.g., Demo.Age, Australia.Weight) or using the Research-site
/		   participant pool or website infrastructure (e.g., NosekLab.othtran.dmp.budd.0001)
/		 * Cleans the datafiles, transposes them, optionally obtains some useful 
/		   information (time spent on a task, sequence of implicit/explicit/other 
/		   tasks within the study, etc.) and merges them.
/		* For the IAT: cleans, reverse-codes, checks for problem participants/trials,
/		   etc. in preparation for substantive analysis
/		* Merges all cleaned and transposed datafiles into a final cleaned dataset.
/
/======================================================================================================
/ PARAMETERS:
/-------name-----------	-------------------------description-------------------------------------------
/PREPARATION SECTIONS:  These clean repeat observations and transposes the data	to create one line per
			 participant, in preparation for merging with other files):
 / ST-SessionTask 
 / S-Session		For Demo-site (& international) tasks w/o separate demographics file
 / S-ALTERNATE Session	for Research-site data, also imports Demographics 
 / D-Demographics	For merging with S-ALTERNATE
 / E-Explicit
 / I-IAT
/INFO ON TASK SECTIONS:
 / ST-Step3-TimeSpent
 / ST-Step A-Consenters
 / ST-Step B-Task Order
 / ST-Step C-Time Spent
 / ST-Step D-experimental condition assignment
/
/MD-Merging Data		Cleaning Raw, Merged & Transposed Section 
/======================================================================================================
/ AMENDMENT HISTORY:
/ init --date--- mod-id ----------------------description----------------------------------------------
/ 0.1.1 Dec10,2008 Import RDE data:
/			Adds Carlee's script to import .txt datafiles downloaded from the 
/			 Project Implicit Virtual Lab's DATA Download feature (i.e., tab-separated 
/			".txt" files) from https://rde.projectimplicit.net
/ 0.2   Apr15,2009 Corrects import of datetime variables so SAS recognizes that they're datetime
/
/======================================================================================================
/ This is public domain software. No guarantee as to suitability or accuracy is
/ given or implied. User uses this code entirely at their own risk.
/====================================================================================================*/



/***********           ************          ************          ************           *************
ST-SessionTask Preparation Section 
The SessionTask file is the map to all study files that participants saw:
   For every participant that was assigned to the study, there will be 1 line for each Task/page, 
	with columns recording the following:
	a) each Task[page] that the participant viewed, 
	b) the Order in which they saw the page, and
	c) the time spent on that page.
   Because data is recorded when the page is loaded, *regardless* of whether they make any responses 
	or continue in the study, this should be used to determine their experimental condition,
	whether they provided consent, etc.

   This is especially important in research site studies, or others which contain 
	experimental manipulations
	
   Note that to reduce the number of transformations, I'm dropping some variables (e.g., user_id and
	Session_Status, some datetime variables) and instead getting them from the Sessions datafile
	(one line per participant).
************           ************          ************          ************           ************/

/***	Step 0 - Bringing in the file							 	   ***/
DATA Temp; /*(DROP=User_Agent User_ID Session_Created_BY Session_Last_Update_Date 
	Session_Creation_Date Task_Creation_Date Session_Status); */
  INFORMAT Task_Creation_Date Session_Date Session_Creation_Date 
	Session_Last_Update_Date DATETIME20.;
  FORMAT Task_Creation_Date Session_Date Session_Creation_Date Session_Last_Update_Date DATETIME20.;
  INFILE Raw(sessionTasks.txt) 	DELIMITER='09'x LRECL=2000 FIRSTOBS  =  2  ;
  INPUT	Session_ID Task_Number Task_ID :$20. Task_URL :$128. User_Agent :$16.
	Study_URL :$48. Task_Status	$ Task_Sequence	 $ Task_Creation_Date :ANYDTDTM21. 
	User_ID Study_Name :$64. Session_Date :ANYDTDTM21. Session_Status:$4. 
	Session_Creation_Date :ANYDTDTM21. Session_Created_By :$24. 
	Session_Last_Update_Date :ANYDTDTM21.;
PROC CONTENTS; RUN;	
/*
                     Data Set Name        WORK.TEMP                              Observations          6052
                     Member Type          DATA                                   Variables             16
                     Engine               V9                                     Indexes               0
                     Created              Wednesday, May 05, 2010 11:48:36 AM    Observation Length    376
                     Last Modified        Wednesday, May 05, 2010 11:48:36 AM    Deleted Observations  0
                     Protection                                                  Compressed            NO
                     Data Set Type                                               Sorted                NO
                     Label
                     Data Representation  WINDOWS_32
                     Encoding             wlatin1  Western (Windows)


                                               Engine/Host Dependent Information

            Data Set Page Size          16384
            Number of Data Set Pages    141
            First Data Page             1
            Max Obs per Page            43
            Obs in First Data Page      36
            Number of Data Set Repairs  0
            File Name                   C:\DOCUME~1\FREDSM~1\LOCALS~1\Temp\SAS Temporary Files\_TD1936\temp.sas7bdat
            Release Created             9.0101M3
            Host Created                XP_PRO


                                          Alphabetic List of Variables and Attributes

                           #    Variable                    Type    Len    Format         Informat

                          16    Session_Created_By          Char     24
                           3    Session_Creation_Date       Num       8    DATETIME20.    DATETIME20.
                           2    Session_Date                Num       8    DATETIME20.    DATETIME20.
                           5    Session_ID                  Num       8
                           4    Session_Last_Update_Date    Num       8    DATETIME20.    DATETIME20.
                          15    Session_Status              Char      4
                          14    Study_Name                  Char     64
                          10    Study_URL                   Char     48
                           1    Task_Creation_Date          Num       8    DATETIME20.    DATETIME20.
                           7    Task_ID                     Char     20
                           6    Task_Number                 Num       8
                          12    Task_Sequence               Char      8
                          11    Task_Status                 Char      8
                           8    Task_URL                    Char    128
                           9    User_Agent                  Char     16
                          13    User_ID                     Num       8

*/
ODS HTML PATH=WebOut FILE="Prep.SessTask.01.Raw.TaskURL.htm";
proc contents data=temp;run;
proc freq data=temp;tables session_date ;format session_date datetime20.;run;

/***	Step 1 - Cleaning Repeat data;
	get rid of duplicate submissions to the dataset of the same data; 			   ***/
PROC SORT DATA = Temp; BY Session_ID Task_URL;  *sorting data;
PROC FREQ DATA = Temp; TABLES Task_URL;RUN;

DATA Temp; SET Temp;
Repeat=0;
     IF Session_ID = LAG(Session_ID) AND Task_URL = LAG(Task_URL)
        THEN Repeat=1; 
	***IF prior row is same as current row, mark as a Repeat;
	ELSE ;
RUN;

TITLE2 'Repeat SessionTask Observations';
PROC FREQ; TABLES Repeat; RUN; 
/*** Drop duplicate lines of data								   ***/
DATA Temp; SET Temp; IF Repeat = 1 THEN DELETE; DROP Repeat; RUN;

TITLE2 'Task URL, help for Cleaning';
/*** If Task_ID is not blank can substitute / add it in the TABLES statement 			   ***/
PROC FREQ DATA = Temp; TABLES Task_URL ; RUN;
ODS HTML CLOSE;

/***NOTE: 
	All following steps for this dataset are entirely optional, and will vary based on your 
	study design and the data you need 							   ***/

/***	Step 2 - Cleaning Task_URL
	       - Step A, etc. = pulling different information from the SessionTask file
	       - Note that for most new studies, you can use the actual Task_ID variable, which 
		  will not have the extraneous HTML code for the location			  ***/

/* Task_URL                                                                Freq   Percent   Cum Freq
//user/andy/stigma2/commonFiles/biattask9.jsp?i=/implicit//user/andy/stigma2/iats/iat_a1-1x.xml&r=/implicit//user/andy/stigma2/d
//user/andy/stigma2/commonFiles/biattask9.jsp?i=/implicit//user/andy/stigma2/iats/iat_b2-4.xml&r=/implicit//user/andy/stigma2/da
//user/andy/stigma2/tasks/17407/index.jsp
//user/andy/stigma2/tasks/17408/index.jsp
//user/andy/stigma2/tasks/17409/index.jsp
//user/andy/stigma2/tasks/17410/index.jsp
//user/andy/stigma2/tasks/17411/index.jsp
//user/andy/stigma2/tasks/17412/index.jsp
//user/andy/stigma2/tasks/17413/index.jsp
//user/andy/stigma2/tasks/17414/index.jsp
//user/andy/stigma2/tasks/17415/index.jsp
//user/andy/stigma2/tasks/17416/index.jsp
//user/andy/stigma2/tasks/17417/index.jsp
//user/andy/stigma2/tasks/17418/index.jsp
//user/andy/stigma2/tasks/17419/index.jsp
//user/andy/stigma2/tasks/17420/index.jsp
//user/andy/stigma2/tasks/17421/index.jsp
//user/andy/stigma2/tasks/17422/index.jsp
//user/andy/stigma2/tasks/miss_1st-1.xml
//user/andy/stigma2/tasks/miss_1st-2.xml
//user/andy/stigma2/tasks/miss_1st-3.xml
//user/andy/stigma2/tasks/miss_1st-4.xml
//user/andy/stigma2/tasks/miss_2nd-1.xml
//user/andy/stigma2/tasks/miss_2nd-2.xml
//user/andy/stigma2/tasks/miss_2nd-3.xml
//user/andy/stigma2/tasks/miss_2nd-4.xml
/common/en-us/html/Instructions.jsp

NOTE: TRANWRD(varname,"searchstring","transstring") translates (for varname) 
	a given searchstring to the given translationstring (or blank) 				  ***/
/*
DATA Temp1; SET Temp;
Task = TRANWRD(Task_URL,"/user/andy/stigma2/commonFiles/biattask9.jsp?i=/implicit//user/andy/stigma2/iats/iat_a1-1x.xml&r=/implicit//user/andy/stigma2/d","");
Task = TRANWRD(Task_URL,"/user/andy/stigma2/commonFiles/biattask9.jsp?i=/implicit//user/andy/stigma2/iats/iat_b2-4.xml&r=/implicit//user/andy/stigma2/da","");

Task = TRANWRD(Task,TRIM("user/andy/stigma2/tasks/"),"");
Task = TRANWRD(Task,TRIM("index"),"");

Task = TRANWRD(Task,TRIM(".xml&r="),"");
Task = TRANWRD(Task,TRIM("IATTask     "),"");
Task = TRANWRD(Task,TRIM("     "),"");
Task = TRANWRD(Task,TRIM("common"),"");
Task = TRANWRD(Task,TRIM("en-us  "),"");
Task = TRANWRD(Task,TRIM("  "),"");

Task = TRANWRD(Task,TRIM("/implicit"),"");
Task = TRANWRD(Task,TRIM("iats"),"");

Task = TRANWRD(Task,TRIM("/user"),"");
Task = TRANWRD(Task,TRIM("/flash/"),"");
Task = TRANWRD(Task,TRIM("html/"),"");

Task = TRANWRD(LEFT(Task),".","");
Task = TRANWRD(LEFT(Task),"/","");
Task = TRANWRD(LEFT(Task),"xml","");
Task = TRANWRD(LEFT(Task),"jsp","");
Task = TRANWRD(LEFT(Task),"r=","");
Task = TRANWRD(LEFT(Task),"?i=","");
Task = TRANWRD(LEFT(Task),"&","");
Task = TRANWRD(Task, "html","");
Task = TRANWRD(Task, "htm","");
Task = TRIM(LEFT(Task));
DROP Task_URL; RUN;
PROC FREQ DATA=Temp1;TABLES Task;RUN;

/***	Sorting data in proper Order; 								  ***/
PROC SORT DATA = Temp; BY Session_ID;  RUN; 

/***********           ************          ************          ************           *************
ST-Step A - Dropping data for non-consenters;
NOTE:	Primarily for Research site data (where you will have demographics data for all who were
	   assigned to the study, even IF they didn't provide consent (I think?). This makes an 
	   important assumption--that the consent form is Task #0, and that if they go past that, that
	   they have provided consent 
     	In newer data, consent will be task_number #1, in which case the WHERE statement and RENAME
	   below should be WHERE=(Task_Number=2) & the DATA statement below reference the _1 variable
************           ************          ************          ************           ************/
PROC TRANSPOSE DATA=Temp(WHERE=(Task_Number=1)) NAME=NAME OUT=GaveConsent(DROP=NAME _LABEL_ 
RENAME=(_1=ConsentGiven)); BY Session_ID; VAR Task_ID; ID Task_Number;RUN;
PROC FREQ DATA=GaveConsent;TABLES ConsentGiven;RUN;
PROC CONTENTS DATA=GaveConsent;RUN;
/***	Depending on number of conditions, this might have many different Tasks, 
	but I just want to mark whether they were assigned to a page after consent 		  ***/
DATA Clean.GaveConsent(DROP=consentGiven);	SET GaveConsent;
FORMAT Consenter 8.;IF consentGiven NE '' THEN Consenter=1;ELSE Consenter = .;
RUN;
PROC FREQ DATA=Clean.GaveConsent;TABLES Consenter / MISSING;RUN;

/***	You would then merge this with the cleaned Demographics datafile, and only
	keep observations where consenter = 1, so that you don't have blank sessions for 
	all participants who were assigned to the study, but didn't go beyond the Consent page;   ***/

/***********           ************          ************          ************           *************
ST-Step B - Determining Task ("pages" in the study_ID) Order ;
	You can do something similar IF Task_ID is not empty 
	To correspond to PCIAS analyses, this also saves the IAT & Explicit task_numbers 
	to ITaskNum & ETaskNum, and saves the IAT variation (a,b,c,d) to TaskTemp
************           ************          ************          ************           ************/
PROC SORT DATA = Temp; BY task_ID;  

ODS HTML PATH=WebOut FILE="Prep.SessTask.03.tasknumbersBYtask.htm";
PROC FREQ DATA = temp; TABLES task_number; BY task_id; RUN; 
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="Prep.SessTask.04.task_ID.htm";
PROC FREQ DATA = temp; TABLES task_id; RUN; 
ODS HTML CLOSE;
/*
                                                                            Cumulative    Cumulative
                           Task_ID                 Frequency     Percent     Frequency      Percent
                           �������������������������������������������������������������������������
                           cami_sr_1_bad                170        2.81           170         2.81
                           cami_sr_1_good               173        2.86           343         5.67
                           cami_sr_2_bad                110        1.82           453         7.49
                           cami_sr_2_good               113        1.87           566         9.35
                           consent                      720       11.90          1286        21.25
                           debriefing                   450        7.44          1736        28.68
                           iat_1_bad                    153        2.53          1889        31.21
                           iat_1_good                   138        2.28          2027        33.49
                           iat_2_bad                    145        2.40          2172        35.89
                           iat_2_good                   146        2.41          2318        38.30
                           iat_feedback_1_bad           128        2.12          2446        40.42
                           iat_feedback_1_good          122        2.02          2568        42.43
                           iat_feedback_2_bad           127        2.10          2695        44.53
                           iat_feedback_2_good          131        2.16          2826        46.70
                           iat_instructions_1_b         163        2.69          2989        49.39
                           iat_instructions_1_g         162        2.68          3151        52.07
                           iat_instructions_2_b         154        2.54          3305        54.61
                           iat_instructions_2_g         154        2.54          3459        57.15
                           imc                          687       11.35          4146        68.51
                           pop_up_close                 453        7.49          4599        75.99
                           post_consent                 696       11.50          5295        87.49
                           startpage                    757       12.51          6052       100.00
*/
DATA tempSessTask;SET Temp;
FORMAT TaskSimple $20.;

	IF Task_ID = "startpage" THEN TaskSimple = "startpage";
	ELSE IF Task_ID IN ('consent') 
     THEN TaskSimple = "consent";
	ELSE IF Task_ID IN ('post_consent') 
     THEN TaskSimple = "post_consent";
	ELSE IF Task_ID IN ('iat_1_bad', 'iat_1_good','iat_2_bad','iat_2_good') 
     THEN TaskSimple = "IAT";
	ELSE IF Task_ID IN ('iat_feedback_1_bad','iat_feedback_1_good','iat_feedback_2_bad','iat_feedback_2_good') 
     THEN TaskSimple = "IATfeedback";
	ELSE IF Task_ID IN ('iat_instructions_1_b','iat_instructions_1_g','iat_instructions_2_b','iat_instructions_2_g') 
     THEN TaskSimple = "IATinstruct";
	ELSE IF Task_ID IN ('imc') 
     THEN TaskSimple = "imc";
	ELSE IF Task_ID IN ('cami_sr_1_bad','cami_sr_1_good','cami_sr_2_bad','cami_sr_2_good') 
     THEN TaskSimple = "cami";
	ELSE IF Task_ID IN ('pop_up_close') THEN	TaskSimple = "pop_up_close";
	ELSE IF Task_ID IN ('debriefing') THEN	TaskSimple = "debrief";
IF Task_ID IN ('null') THEN DELETE;

PROC CONTENTS; RUN;

PROC FREQ DATA=TempSessTask;TABLES TaskSimple; RUN;

/***	Only run this line out once you are sure you've fully recoded Task into TaskSimple	  ***/
DATA TempSessTask;SET TempSessTask; 
 task=tasksimple; DROP tasksimple;
PROC CONTENTS; RUN;

DATA Order; SET TempSessTask(KEEP=Session_ID Task Task_Number);RUN;
PROC SORT DATA=Order;BY Session_ID Task; RUN;
/***
NOTE:	You would probably need to do some renaming (as is done here when I create the 
	TempSessTask data). Otherwise, there will be a separate Order variable for each of the 
	counterbalanced IATs, and all other file variations left in the Task variable, which would
	lead to some extraneous variables							  ***/
PROC TRANSPOSE DATA=Order PREFIX=ORD NAME=NAME OUT=Clean.Order(DROP=NAME _LABEL_); BY Session_ID; 
		VAR Task_Number; ID Task;
RUN;

PROC CONTENTS DATA=Clean.Order;
RUN;

/***	Enter all variables (except Session_ID!) here 
	This gives a basic rendering of the order of important tasks. Useful if you need to calculate
	new variable (with IF-THEN clauses) indicating whether explicit came before/after IAT 	  ***/

ODS HTML PATH=WebOut FILE="Prep.SessTask.05.order_tasksimple.htm";
  PROC FREQ DATA=Clean.Order; TABLES 
 ORDstartpage ORDconsent ORDpost_consent ORDimc ORDIATinstruct ORDIAT ORDIATfeedback
 ORDcami ORDpop_up_close ORDdebrief;
RUN;
ODS HTML CLOSE;

DATA clean.iatcamiorder; SET Clean.Order;
	IATbeforeCAMI = .;
      IF ORDIATinstruct = 4 THEN IATbeforeCAMI = 1;
	  ELSE IF ORDcami = 4 THEN IATbeforeCAMI = 0;
 KEEP session_id IATbeforeCAMI;
PROC CONTENTS; RUN; PROC FREQ DATA = clean.iatcamiorder; TABLES IATbeforeCAMI; RUN;

/* TAKE CARE OF THIS DURING IAT CLEANING
DATA iatgoodbadorder; SET Temp;
	IATgood1st = .;
      IF Task_ID IN ('iat_instructions_1_b','iat_instructions_2_b') THEN IATgood1st = 0;
	  ELSE IF Task_ID IN ('iat_instructions_1_g','iat_instructions_2_g') THEN IATgood1st = 1;
 KEEP session_id IATgood1st;
 PROC FREQ; TABLES IATgood1st; RUN;
PROC SORT DATA = iatgoodbadorder; BY session_id; RUN;
DATA iatgoodbadorder; SET iatgoodbadorder;
  IF LAG(session_id) NE session_id;  *delete duplicate IDs;
    PROC FREQ; TABLES session_id IATgood1st; RUN;
*/
/***********           ************          ************          ************           *************
ST-Step C	
	- Calculating time spent on the entire study (or modify to assess time on specific Tasks);
	- Identifies amount of time for each Task. This requires more recoding, but works better for
	   long-running Demo-site tasks, where the design often changes (so last Task_number changes)
	- IF you have a simple design and just want time spent on the entire study, just TRANSPOSE
 	  Task_number instead of Task IF you do something, you can add the following 2 lines to 
	  TimeOnTasks datastep:
 	* FORMAT TimeIAT TimeIatinstr TimeSurvey mmss.; 
	* TimeSurvey = (Start2-Start1); TimeIatinstr = (Start3-Start2); TimeIAT = (Start4-Start3);
************           ************          ************          ************           ************/
PROC CONTENTS DATA = TempSessTask; RUN;

TITLE2 'Time for Each Task';
DATA TempTime;
SET TempSessTask(KEEP=Session_ID task Task_Number task_creation_Date);
PROC SORT DATA = temptime; BY session_id; RUN;

/***	Identify Task start time								  ***/
PROC TRANSPOSE DATA=TempTime PREFIX=Start NAME=NAME OUT=TimeOnTasks(DROP=NAME _LABEL_);
	BY Session_ID;  VAR task_creation_date; ID task;
RUN;
PROC TRANSPOSE DATA=TempTime PREFIX=Num NAME=NAME OUT=TimeTNum(DROP=NAME _LABEL_); BY Session_ID; 
	VAR Task_Number; ID task;
RUN;
PROC CONTENTS DATA=TimeOnTasks; RUN; 
/*
                     Data Set Name        WORK.TIMEONTASKS                       Observations          757
                     Member Type          DATA                                   Variables             11
                     Engine               V9                                     Indexes               0
                     Created              Wednesday, May 05, 2010 12:54:35 PM    Observation Length    88
                     Last Modified        Wednesday, May 05, 2010 12:54:35 PM    Deleted Observations  0
                     Protection                                                  Compressed            NO
                     Data Set Type                                               Sorted                NO
                     Label
                     Data Representation  WINDOWS_32
                     Encoding             wlatin1  Western (Windows)


                                               Engine/Host Dependent Information

        Data Set Page Size          8192
        Number of Data Set Pages    9
        First Data Page             1
        Max Obs per Page            92
        Obs in First Data Page      67
        Number of Data Set Repairs  0
        File Name                   C:\DOCUME~1\FREDSM~1\LOCALS~1\Temp\SAS Temporary Files\_TD1936\timeontasks.sas7bdat
        Release Created             9.0101M3
        Host Created                XP_PRO


                                          Alphabetic List of Variables and Attributes

                                      #    Variable             Type    Len    Format

                                      1    Session_ID           Num       8
                                      9    StartIAT             Num       8    DATETIME20.
                                     10    StartIATfeedback     Num       8    DATETIME20.
                                      4    StartIATinstruct     Num       8    DATETIME20.
                                      2    Startcami            Num       8    DATETIME20.
                                      3    Startconsent         Num       8    DATETIME20.
                                      8    Startdebrief         Num       8    DATETIME20.
                                      5    Startimc             Num       8    DATETIME20.
                                     11    Startpop_up_close    Num       8    DATETIME20.
                                      6    Startpost_consent    Num       8    DATETIME20.
                                      7    Startstartpage       Num       8    DATETIME20.
*/

PROC CONTENTS DATA=TimeTNum; RUN; 
/*
                                               #    Variable           Type    Len

                                               9    NumIAT             Num       8
                                              10    NumIATfeedback     Num       8
                                               4    NumIATinstruct     Num       8
                                               2    Numcami            Num       8
                                               3    Numconsent         Num       8
                                               8    Numdebrief         Num       8
                                               5    Numimc             Num       8
                                              11    Numpop_up_close    Num       8
                                               6    Numpost_consent    Num       8
                                               7    Numstartpage       Num       8
                                               1    Session_ID         Num       8
*/
PROC SORT DATA=TimeTNum;BY Session_Id; RUN;
PROC SORT DATA=TimeOnTasks;BY Session_Id; RUN;

/***	
NOTE:	SAS's default Times/date formats are crazy: 
	They're the number of seconds since January 1st, 1960
	So if you do a difference score, you'll get the number of seconds, but you'll want to 
	  apply a format to make it easier to read.
	Check SAS help for mmss. format or time. format for more details about them. 
	Here are a few, with the raw value first, then the displayed value that each time format 
	  will result in:
		_Format_Raw #_	_Displays_
		 HHMM. 	53132 	14:46 
		 HOUR. 	53132 	15 
		 MMSS. 	53132 	885 
		 TIME. 	53132 	14:45:32 
 		TOD. 	53132 	14:45:32							   ***/

DATA clean.TimeOnTasks; Merge TimeOnTasks TimeTNum;
BY Session_ID;
RUN;
/***	By specifying the format for the new Time variables before I set them equal to something
	SAS keeps them in the format I'd like 							  ***/
/*
                                      1    Session_ID           Num       8
                                      9    StartIAT             Num       8    DATETIME20.
                                     10    StartIATfeedback     Num       8    DATETIME20.
                                      4    StartIATinstruct     Num       8    DATETIME20.
                                      2    Startcami            Num       8    DATETIME20.
                                      3    Startconsent         Num       8    DATETIME20.
                                      8    Startdebrief         Num       8    DATETIME20.
                                      5    Startimc             Num       8    DATETIME20.
                                     11    Startpop_up_close    Num       8    DATETIME20.
                                      6    Startpost_consent    Num       8    DATETIME20.
                                      7    Startstartpage       Num       8    DATETIME20.
*/
DATA clean.TimeOnTasks; SET clean.TimeOnTasks;
FORMAT timetotal timeconsent timeimc timeIATinstruct timeIAT timeIATfeedback timecami  MMSS.;	/* Or TIME. or MMSS.*/
/* numconsent=1 numimc=3 numIATinstruct= 4/5 numIAT=5/6, numIATfeedback=6/7, numcami=4/7 */
timeconsent = Startpost_consent - Startconsent;
timeIATinstruct = StartIAT - StartIATinstruct;
timeIAT = StartIATfeedback - StartIAT;
timetotal = Startdebrief - Startconsent;

IF  numIATinstruct = 4 THEN DO; 
		timeIMC = StartIATinstruct - Startimc;
		timeIATfeedback = Startcami - StartIATfeedback;
		timecami = Startpop_up_close - Startcami;
END;
IF  numIATinstruct = 5 THEN DO; 
		timeIMC = Startcami - Startimc;
		timeIATfeedback = Startpop_up_close - StartIATfeedback;
		timecami = StartIATinstruct - Startcami;
END;
RUN;
PROC MEANS DATA=clean.TimeOnTasks;
 VAR timetotal timeconsent timeimc timeIATinstruct timeIAT timeIATfeedback timecami ;
RUN;

DATA Clean.TimeOnTasks(KEEP= Session_ID  timetotal timeconsent timeimc timeIATinstruct timeIAT timeIATfeedback timecami);
SET clean.TimeOnTasks; RUN;

PROC MEANS DATA=Clean.TimeOnTasks;VAR timetotal timeconsent timeimc timeIATinstruct timeIAT timeIATfeedback timecami;
	FORMAT timetotal timeconsent timeimc timeIATinstruct timeIAT timeIATfeedback timecami TIME.; RUN;

PROC UNIVARIATE DATA=Clean.TimeOnTasks NOPRINT;VAR timetotal timeconsent timeimc timeIATinstruct timeIAT timeIATfeedback timecami ;
	HISTOGRAM timetotal timeconsent timeimc timeIATinstruct timeIAT timeIATfeedback timecami ;
	FORMAT timetotal timeconsent timeimc timeIATinstruct timeIAT timeIATfeedback timecami TIME10.;RUN;

/***********           ************          ************          ************           *************
ST-Step D - Identifying experimental assignment
	  - This will require you to change the coding to fit your design, but this should give 
	     you an idea of what you need to do.
	  - New-model studies, WHERE Task_ID contains values, may be easier to decode
	  - Use this, rather recoding based on the Explicit datafile, because session_Task records 
	     experimental assignment for all observations, not just those in which participants 
	     reported at least one response!

NOTE:	For the research-site study from which this is taken, Task_number 1 always contained my 
	experimental manipulation.

%&&&&&&&&&&&&&  CHANGE THIS								  &&&&&&&&&&&&%

	NOTE: You MUST change this to fit your study design!!!
	This does not work with Demo (e.g., age data because experimental condition info is not
	given in Task_number=1, but does work with the research-site study from which it was adapted
************           ************          ************          ************           ************/
/*
PROC SORT DATA=Temp;BY Session_ID;RUN;
proc freq data=temp;tables task_URL;where task_number=1;run;
PROC TRANSPOSE DATA=Temp(WHERE=(Task_Number=1)) NAME=NAME OUT=expassign(DROP=NAME _LABEL_ 
RENAME=(_1=SurveyName)); BY Session_ID; 		VAR Task_URL; ID Task_Number;RUN;
*/
/*
DATA Clean.ExpAssign; SET ExpAssign;
*/
/***
NOTE:	This was written for a study WHERE the Surveyname followed a SET FORMAT, WHERE all survey 
	  file names were of equal length, looking something like this: surveyblack2amerb
	Here are some helpful functions to recode this:
	 * SUBSTR(varname,NPosition,NLength) pulls a substring from VARNAME, beginning at NPOSITION,
	    and continues for NLENGTH characters (assumed to be 1, IF not otherwise specified)
	 * PUT(varname, FORMAT.) takes the values from VARNAME and outputs them using the FORMAT
	    ($N. = character; N. = numeric)
	 * KINDEX(varname, "searchstring"): searches the CONTENTS of _varname_ for the provided 
	    "searchstring". Returns 1 IF it is there, 0 IF not. 				   ***/
/*
   FORMAT SName QOrder 8. ;
   FORMAT SEthnic SIdeol 7.1;
   *** codes for the first name of target counterbalancing; 
      SName = PUT(SUBSTR(SurveyName, 12,1),8.); 
   /*** Or substitute a long IF-THEN-ELSE statement, beginning: 
	IF SUBSTR(Surveyname, 12,1)= "1" THEN Sname = 1; ***/
*/
   *** determine the target's apparent ethnicity;
/*
      IF KINDEX(SurveyName, "black") THEN Sethnic =.5;
	ELSE IF kindex(SurveyName, "white") THEN Sethnic = -.5;
	ELSE Sethnic =.;

   *** codes for target's speech ideology;
      IF KINDEX(SurveyName, "amer") THEN SIdeol = -.5;
	ELSE IF KINDEX(SurveyName, "arab") THEN SIdeol = .5;
      ELSE SIdeol = .;

   *** codes for order of agree/disagree statements;
      IF SUBSTR(SurveyName,17) = "a" THEN QOrder = 0;
	ELSE IF SUBSTR(SurveyName,17) = "b" THEN QOrder = 1; 
      ELSE QOrder = .;
RUN;
PROC FREQ DATA=Clean.ExpAssign;TABLES Sname QOrder SEthnic SIdeol;RUN;

/***********           ************          ************          ************           *************
S-Session Preparation Section (No valid demographics data)
   For Demo- or International-site tasks (with no signup-demographics). 
   It contains one line for each participant.

   IF YOU HAVE VALID DEMOGRAPHICS DATA (i.e., your study used the Research-site participant pool), 
	use the alternate Session Preparation section below.

   Also relevant for determining whether they saw all pages of the study through to the debriefing or
	other last page, in which case Session_Status would be = "C" for Complete (note: this doesn't
	assess whether they chose to *respond* to all items)
************           ************          ************          ************           ************/

/***	Step 0 - Bringing in the file;	***/
/*
DATA Clean.Sessions(DROP=Referrer Previous_Session_Schema Previous_Session_ID Referrer 
	Study_Url User_Agent);
  INFORMAT Session_Date Creation_Date Last_Update_Date DATETIME20.;
  FORMAT Session_Date Creation_Date Last_Update_Date DATETIME20.;
  INFILE Raw(sessions.txt) 	DELIMITER='09'x LRECL=2000 FIRSTOBS  =  2  ;
  INPUT Session_ID User_ID Study_Name:$48. Session_Date:ANYDTDTM21. 
	Session_Status:$4. Creation_Date:ANYDTDTM21. Last_Update_Date:ANYDTDTM21.
	Previous_Session_ID Previous_Session_Schema $ Referrer $ Study_URL :$48. User_Agent :$8.;
if Session_Status = "null" then Session_Status = "M";
RUN;



PROC SORT DATA=Clean.sessions; BY Session_ID;RUN;
/***	Session data is now cleaned and sorted for merging with other data;
	Note: Transposing is not necessary because there is only 1 line per participant		   ***/


/***********           ************          ************          ************           *************
S-ALTERNATE Session Preparation Section:
***for Research-site data with Demographics only***
   Primarily used with research-site data, to match up user_id (in demographics data) to the 
	session_id (specific to this study, and is used in all other files).

   This section doesn't match perfectly with the rest of the analysis script because this is from a
	different dataset (a research-site study). They're similar enough that you shouldn't have
	problems merging them 
************           ************          ************          ************           ************/

/***	Step 0 - Bringing in the Sessions file							   ***/
/***
NOTE:	The alteration is that this doesn't drop User_ID, so that it can be match-merged with
	the transposed demographics datafile below						   ***/
DATA clean.Sessions(DROP=Referrer Previous_Session_Schema Previous_Session_ID Referrer 
	Study_Url User_Agent);
  INFORMAT Session_Date Creation_Date Last_Update_Date DATETIME20.;
  FORMAT Session_Date Creation_Date Last_Update_Date DATETIME20.;
  INFILE Raw(sessions.txt) 	DELIMITER='09'x LRECL=2000 FIRSTOBS  =  2  ;
  INPUT Session_ID User_ID Study_Name:$48. Session_Date:ANYDTDTM21. 
	Session_Status:$4. Creation_Date:ANYDTDTM21. Last_Update_Date:ANYDTDTM21.
	Previous_Session_ID Previous_Session_Schema $ Referrer $ Study_URL :$48. User_Agent :$8.;
if Session_Status = "null" then Session_Status = "M";
RUN;
/***	Session data is now cleaned and sorted for merging with the demographics;
NOTE: Transposing is not necessary because there is only 1 line per participant		  	 ***/


/***********           ************          ************          ************           *************
D-Demographics Preparation Section:
	This file will only be valid (unique values for User_ID, not 0s or -1s) for studies with 
	   samples from the research-site participant pool. 

THAT MEANS: 
	* Not relevant for Demo/International-site studies
	* Unlikely to be relevant for contract studies
************           ************          ************          ************           ************/
/*
ODS HTML CLOSE;

/***	Step 0 - Bringing in the demographics file 						   ***/
ODS HTML PATH=WebOut FILE="Prep.SessDemo.01.Cleaning.Demographics.htm";

DATA Temp;
   INFILE Raw(demographics.txt) 	DELIMITER='09'x LRECL=2000 FIRSTOBS  =  2  ;
  INPUT Characteristic:$16.  Value:$16. User_ID  study_name:$48.;
if Value = "null" then Value = ".";
RUN;

PROC SORT DATA = Temp; BY User_ID Characteristic; RUN;

/***	Step 1 - Cleaning Repeat data;
	Get rid of duplicate submissions to the dataset of the same data; 			   ***/
DATA Temp; SET Temp;
Repeat=0;
     IF User_ID = LAG(User_ID) AND Characteristic = LAG(Characteristic)
        THEN Repeat=1; 
	***If prior row is same as current row, mark as a Repeat;
	ELSE ;
RUN;

TITLE2 'Repeat Demographics Observations';
PROC FREQ; TABLES Repeat; RUN; 
/***	Drop duplicate lines of data								   ***/
DATA Temp; SET Temp; IF Repeat = 1 THEN DELETE; DROP Repeat; RUN;

/***	Step 2 - Transposing Demographics data 					 		   ***/
PROC TRANSPOSE DATA=Temp NAME=NAME PREFIX=Old OUT=Temp1(DROP= _LABEL_ NAME); BY User_ID; 
	VAR Value;ID Characteristic;
RUN;

/***	Step 3 - Merging Demographics & Sessions data;
	Merging, based on User_ID (Demographics has only User_ID, other "real" files use only 
	  Session_ID, so we need Sessions data to identify participants session_ID.
	  Drop the User_ID after this. 

	Also necessary to bring in Session_Status and especially Session_Date, so that participants'
	  age on the date of the session can be computed 					   ***/
PROC SORT DATA=clean.sessions; BY User_ID;RUN;
PROC SORT DATA=Temp1; BY User_ID;RUN;

DATA clean.TempDemos;MERGE clean.sessions Temp1;BY User_ID;RUN;
/***	Step 4 - Dropping all non-consenters
	Retaining demographics data only for consenting participants, so that the number of valid
	  sessions isn't artificially inflated by the number of participants assigned to the study
	  who chose not to participate.
NOTE:	
This requires you to follow the step in the SessionTask preparation section to identify
	the consenters (i.e., Clean.GaveConsent)						   ***/
/********
Fred will do this step manually, as I want to be able to assess demographic-indicators of 
   non-consent
/********
PROC SORT DATA=clean.TempDemos; BY Session_ID;RUN;
PROC SORT DATA=Clean.GaveConsent; BY Session_ID;RUN;
DATA Temp(WHERE=(Consenter = 1));MERGE clean.TempDemos Clean.GaveConsent;BY Session_ID;
RUN;
*/
DATA temp; SET clean.TempDemos; PROC CONTENTS DATA = temp; RUN;
/*
                               #    Variable            Type    Len    Format         Informat

                               2    Creation_Date       Num       8    DATETIME20.    DATETIME20.
                               3    Last_Update_Date    Num       8    DATETIME20.    DATETIME20.
                               8    Oldbirthmonth       Char     16
                               9    Oldbirthyear        Char     16
                              10    Oldcitizenship      Char     16
                              11    Oldclass            Char     16
                              23    Olddayofbirth       Char     16
                              12    Oldeducation        Char     16
                              13    Oldengfluency       Char     16
                              14    Oldethnicity        Char     16
                              24    Oldethnicityomb     Char     16
                              25    Oldgenoccupation    Char     16
                              15    Oldincome           Char     16
                              26    Oldmajor            Char     16
                              16    Oldoccupation       Char     16
                              17    Oldpoliticalid      Char     16
                              27    Oldraceomb          Char     16
                              29    Oldreldenom         Char     16
                              28    Oldrelfamily        Char     16
                              18    Oldreligion         Char     16
                              19    Oldreligionid       Char     16
                              20    Oldresidence        Char     16
                              21    Oldsex              Char     16
                              22    Oldzipcode          Char     16
                               1    Session_Date        Num       8    DATETIME20.    DATETIME20.
                               4    Session_ID          Num       8
                               7    Session_Status      Char      4
                               6    Study_Name          Char     48
                               5    User_ID             Num       8
*/
/***	Step 5 - Reducing the size of these variables, to save room. In SAS, this requires that a new
	  variable be created, with a shorter length (or numeric instead of character).	Note that in
	  the PROC TRANSPOSE above, I added the prefix "Old" to all transposed variables so that I
	  don't have to rename the shortened variables here, or choose new variable names for the
	  shortened ones.									   ***/
/***NOTE:
	Cleaning demographics data only for consenting participants				   ***/

DATA clean.Demos(DROP=User_ID _LABEL_ NAME OldBirthMonth OldBirthYear OldCitizenship OldClass 
	OldDayofBirth OldEducation OldEngFluency OldEthnicity OldIncome OldMajor OldOccupation 
	OldPoliticalID OldRaceOMB  OldEthnicityOMB OldGenoccupation OldReligionID OldResidence 
	OldSex OldZipcode
	RENAME = (OldRelDenom=RelDenom OldRelFamily=RelFamily OldReligion=Religion ));
   SET Temp;
   FORMAT Birth MMDDYYN.; 
/***NOTE
	on date format: I am converting the datetime-format variables into date variables, so that age
	on the session_date can be computed. Older data will not include the DayofBirth variable. For
	those people their ages are calculated,  with the assumption that it falls on the last of 
	their birthmonth									   ***/
   IF OldBirthYear NOT IN ('.','') THEN DO;
      IF OldDayOfBirth NOT IN ('','.') THEN DO; 
	**** Participant error/spoofing. Leap years occur on 4 yr intervals (like the old Olympics);
	IF OldBirthMonth='2' & OldDayOfBirth='29' & MOD(PUT(OldBirthYear,8.),4) NE 0 
	  THEN OldDayOfBirth='28';
        IF OldBirthMonth NOT IN ('','.') THEN Birth = MDY( 
		PUT(OldBirthMonth,2.), PUT(OldDayOfBirth,2.),  PUT(OldBirthYear,4.)	); 
	ELSE Birth = MDY( 12,PUT(OldDayOfBirth,2.),PUT(OldBirthYear,4.) ); 
      END;
      ELSE IF OldBirthMonth = '2' 					THEN Birth = 
	MDY( PUT(OldBirthMonth,2.),28,PUT(OldBirthYear,4.) ); 
      ELSE IF OldBirthMonth IN ('1', '3', '5', '7', '8', '10', '12')	THEN Birth = 
	MDY( PUT(OldBirthMonth,2.),31,PUT(OldBirthYear,4.) ); 
      ELSE IF OldBirthMonth IN ('4', '6', '9', '11')	  		THEN Birth = 
	MDY( PUT(OldBirthMonth,2.),30,PUT(OldBirthYear,4.) ); 
      ELSE IF OldBirthMonth IN ('','.') & OldBirthYear NOT IN ('','.')	THEN Birth = 
	MDY( 12,31,PUT(OldBirthYear,4.) );
   END;
   ELSE;
   FORMAT Edu Class EngFluency Ethnic EthnicityOMB RaceOMB Income PoliticalID RelID Major 8.; 
/***NOTE:
	Numeric responses: As long as you define the new variable's format type before setting 
   	  NewVar=  OldVar, it will use the FORMAT type you specified. Otherwise, you need to use 
   	  PUT statements to force SAS to recognize the numeric formatting			   ***/
   Edu = OldEducation; Class = OldClass; EngFluency = OldEngFluency;
   Ethnic = OldEthnicity;
   EthnicityOMB = OldEthnicityOMB; 
   RaceOMB = OldRaceOMB;
/***NOTE:
	The way we recorded race changed abruptly around September 27, 2006, because NIH indicated
   	that we should request it using the categories used in the census. See the wiki section:
   	https://www.projectimplicit.net/imptrac/wiki/EthnicityDetails for details. If you have
   	some participants who reported ethnicity and others who reported EthnicityOMB/RaceOMB
   	then you'll need to recode it (see create_common_dir._format-label_PI_vars.sas on the wiki***/
   Income = OldIncome; PoliticalID = OldPoliticalID; RelID = OldReligionID; Major = OldMajor;
   FORMAT Sex $1.; Sex = OldSex;
   FORMAT CountryCit CountryRes $2.; CountryCit = OldCitizenship; CountryRes = OldResidence;

   *** Drops out non-numeric values from the zipcode. 
	Don't use if you want international "zip"codes ;
   FORMAT Zipcode z8.;	Zipcode = INPUT(COMPRESS(OldZipcode,'0123456789','k'), 8.);
   FORMAT Occupation $9.; 		Occupation = TRIM(OldOccupation);
   FORMAT GenOccupation $9.; 	Genoccupation = TRIM(OldGenOccupation);
RUN;
DATA Clean.Demos;set clean.Demos; 
/***NOTE:
	INTCK needs ('INTERVAL',BeginningDate,EndingDate). It counts the number of full SAS-provided 
	  intervals (that is, months) between the Beginning and Ending dates of the Session_Date and
	   the calculated birth (counting in years doesn't work, according to the invisible people 
	   on the Internet). 
	Because we want the number of years old, I divide the age in months by 12.
	FLOOR truncates the age, rather than rounding (if I'm 29 and 11 months old, I wouldn't want
	   it to round UP, I'd want it to recognize that I'm 29 until 11:59 on my birthday eve.)   ***/
FORMAT Age 8.0;
Age=FLOOR((INTCK('MONTH',Birth,DATEPART(Session_Date))) / 12);RUN;
ODS HTML CLOSE;
/***	Demographics & Session data are now cleaned and sorted for merging with other data	   ***/

PROC SORT DATA = clean.demos; BY session_id; RUN;

/***********           ************          ************          ************           *************
E-Explicit Preparation Section 
************           ************          ************          ************           ************/

/***	Step 0 - Bringing in the explicit file 							   ***/
DATA Explicit;
	INFILE Raw(explicit.txt) 	DELIMITER='09'x LRECL=2000 FIRSTOBS  =  2  MISSOVER;
	INPUT Task_Number	Question_Number Questionnaire_Name:$9. Question_Name:$14.
		Question_Response:$150. Attempt Study_Name:$48. Session_ID; 
RUN;

/***	Step 1 - Cleaning Repeat data;
	Get rid of duplicate submissions to the dataset of the same data; 			   ***/

ODS HTML CLOSE;
ODS HTML PATH=WebOut FILE="Prep.Exp.01.Repeat.htm";
PROC CONTENTS; RUN;
PROC MEANS N MAXDEC=0;RUN;

DATA temp; SET explicit; RUN;

PROC SORT DATA=Temp; BY Session_ID Questionnaire_Name Question_Name;RUN;  *** Sorting data;
DATA Temp; SET Temp;
     Repeat=0;
     IF Session_ID = LAG(Session_ID) AND Questionnaire_Name = LAG(Questionnaire_Name) 
        AND Question_Name = LAG(Question_Name) THEN Repeat=1; 
	***If prior row is same as current row, mark as a repeat, to prevent problems in transpose;
RUN;

TITLE2 "Repeat Explicit observations";
PROC FREQ; TABLES Repeat;RUN;
***	Drop all Repeat data;  
DATA Temp; SET Temp;IF Repeat = 1 THEN DELETE; DROP Repeat; 
RUN;

ODS HTML CLOSE;
ODS HTML PATH=WebOut FILE="Prep.Exp.02.Transposing.Explicit.htm";
/***	Step 2 - Transpose Explicit data							   ***/
/***	Step 2a - Transpose Explicit: convert numeric variables to numeric format
  		- This separates out the numeric variables (you should run a PROC FREQ on
		   Question_Name variable, to identify , so that I can forcibly convert the
		   question_response to numeric *before* transposition, necessary in SAS (otherwise,
		   I would have to individually convert every numeric variable, as is done for the
		   character variables). 							   ***/
PROC FREQ DATA = temp; TABLES question_name; RUN;
/*
                                Question_                              Cumulative    Cumulative
                                Name          Frequency     Percent     Frequency      Percent
                                ���������������������������������������������������������������
                                cami1              519        2.72           519         2.72
                                cami10             519        2.72          1038         5.44
                                cami10rt           519        2.72          1557         8.15
                                cami10trt          519        2.72          2076        10.87
                                cami1rt            519        2.72          2595        13.59
                                cami1trt           519        2.72          3114        16.31
                                cami2              519        2.72          3633        19.03
                                cami2rt            519        2.72          4152        21.74
                                cami2trt           519        2.72          4671        24.46
                                cami3              519        2.72          5190        27.18
                                cami3rt            519        2.72          5709        29.90
                                cami3trt           519        2.72          6228        32.62
                                cami4              519        2.72          6747        35.33
                                cami4rt            519        2.72          7266        38.05
                                cami4trt           519        2.72          7785        40.77
                                cami5              519        2.72          8304        43.49
                                cami5rt            519        2.72          8823        46.21
                                cami5trt           519        2.72          9342        48.92
                                cami6              519        2.72          9861        51.64
                                cami6rt            519        2.72         10380        54.36
                                cami6trt           519        2.72         10899        57.08
                                cami7              519        2.72         11418        59.80
                                cami7rt            519        2.72         11937        62.51
                                cami7trt           519        2.72         12456        65.23
                                cami8              519        2.72         12975        67.95
                                cami8rt            519        2.72         13494        70.67
                                cami8trt           519        2.72         14013        73.39
                                cami9              519        2.72         14532        76.10
                                cami9rt            519        2.72         15051        78.82
                                cami9trt           519        2.72         15570        81.54
                                d                  507        2.66         16077        84.19
                                feedback           507        2.66         16584        86.85
                                omdimc2            668        3.50         17252        90.35
                                omdimc2rt          668        3.50         17920        93.85
                                omdimc2trt         668        3.50         18588        97.34
                                text               507        2.66         19095       100.00
*/ 
DATA ExplicitNum; SET Temp(WHERE=(Question_Name NOTIN (
'feedback','text','d')));
PROC FREQ DATA = ExplicitNum; TABLES question_name; RUN;

DATA ExplicitNum; SET ExplicitNum;
 question_response_s = question_response;
 DROP question_response;
PROC CONTENTS; RUN;

DATA ExplicitNum1(DROP=Question_Response_S);SET ExplicitNum;
FORMAT Question_Response 8.;
Question_Response = PUT(Question_Response_S,8.);
RUN;
PROC CONTENTS DATA=ExplicitNum1;RUN;

/***	Transpose data to get questions and answers						   ***/
PROC TRANSPOSE DATA=ExplicitNum1 NAME=NAME OUT=ExplicitNum(DROP=_LABEL_ NAME); 
	*variable name = Question_name, data = Question_Response;
               VAR Question_Response; BY Session_ID; ID Question_Name; 
PROC CONTENTS DATA = explicitnum; RUN; *one line per session;

 /***	Step 2b - Transpose Explicit character variables & shorten				   ***/
 /* Retain the feedback */

DATA ExplicitChar; SET Temp;
WHERE Question_Name IN ('feedback');
PROC CONTENTS; RUN;
PROC TRANSPOSE DATA=ExplicitChar PREFIX=Old NAME=NAME OUT=ExplicitChar1(DROP=_LABEL_ NAME);
	*** variable name = Question_name, data = Question_Response;
               VAR Question_Response; BY Session_ID; ID Question_Name;RUN; 
	***one line per session;
PROC CONTENTS DATA=ExplicitChar1;RUN;
PROC PRINT DATA=ExplicitChar1; VAR oldfeedback; RUN;

DATA ExplicitChar; SET ExplicitChar1;
 feedback = oldfeedback;
 DROP oldfeedback;
RUN;
PROC CONTENTS DATA=ExplicitChar;RUN;
PROC CONTENTS DATA=ExplicitNum;RUN;

DATA Clean.feedback; SET ExplicitChar;RUN; * data file for Andrew ;

PROC SORT DATA=ExplicitChar; BY session_id; RUN;
PROC SORT DATA=ExplicitNum; BY session_id; RUN;

/*PROC CONTENTS data=Explicit;RUN; */
DATA Clean.Explicit; MERGE ExplicitNum ExplicitChar; BY session_id; RUN;
PROC CONTENTS DATA=Clean.Explicit;RUN;
/***	Explicit data is now cleaned and sorted for merging with other data;			   ***/

/***********           ************          ************          ************           *************
I-IAT Preparation Section 
************           ************          ************          ************           ************/

/*** Step 0 - Bringing in the file								   ***/
DATA Temp;
	INFILE Raw(iat.txt) 	DELIMITER='09'x LRECL=2000 FIRSTOBS  =  2  ;
	INPUT Block_Number /* Block in the IAT, usually from 0 - 6; for the SPF, 0-3 */
		Block_Name $ /* Usually has format "BLOCK9 */
		Block_Trial_count /* Total N in this block, usually 40 or 20 */
		Block_Pairing_Definition :$35. /* Short/nonexistent for SPF, Category labels for IAT
			(if yours has longer labels, you may need to adjust length ) */
		Study_Name:$48. /* Same as in the other files. May need to adjust length */
		Task_Number /* I think this should match with # in SessionTasks?, as
				its sequence in entire study  */
		Task_Name:$16. /* usually IAT version (e.g., agea, ageb agec, aged) */
	        Trial_Number 
		Trial_Name:$40. /* Stimulus name (e.g., picture name or word */
		Trial_Response:$26. 
		Trial_Latency /* In milliseconds */
		Trial_Error /* See macro--logical value 0 or 1, comparing trial name & response */
		Session_ID;
PROC CONTENTS DATA = temp; RUN;	

/***	Sometimes a datapoint is inadvertently sent to the database twice for the same Task;
	This section will sort the data, identify duplicates, and remove them 			   ***/

/*** Step 1 - Cleaning Repeat data								   ***/
ODS HTML CLOSE;
ODS HTML PATH=WebOut FILE="Prep.IAT.01.Repeat.htm";
TITLE1 "Menatti";

PROC SORT DATA=Temp; BY Session_ID Task_NAME Block_NAME Trial_Number;
DATA Temp; SET Temp;      
***	this assumes that the same Task_name is not administered twice in a session;
Repeat=0;
     IF Session_ID = LAG(Session_ID) AND Task_NAME = LAG(Task_NAME)
        AND Block_NAME = LAG(Block_NAME) AND Trial_Number = LAG(Trial_Number) THEN Repeat=1;
TITLE2 "Repeat IAT observations";
PROC FREQ; TABLES Repeat;RUN;

DATA Temp(DROP=Repeat); SET Temp; 
   IF Repeat = 1 THEN DELETE;  ***dropping Repeat data;
   ***Formats Trial_Error into (numeric, not character) format expected/required by IAT scoring macro;
   * Trial_Error = MEAN(TrlError); 	/* Trial_Error was already numeric in the Nobels genderscience data */
RUN;
	
TITLE2 "IAT CONTENTS";
PROC CONTENTS DATA=Temp; RUN;    

/***	Step 2	- initial data cleaning checks 
		- what versions of the IAT were used in this dataset?
		- Use the information here to take care of any stray sessions (from other studies) and
		    later to reverse-code the IAT						   ***/
ODS HTML CLOSE;
ODS HTML PATH=WebOut FILE="Prep.IAT.02.Design.Details.htm";

TITLE2 "Number of Blocks";
PROC MEANS N MEAN MIN MAX; CLASS Block_NAME; VAR Session_ID; WHERE Trial_Number = 1;RUN; 
/*
                           Block_
                           Name        N Obs        N            Mean         Minimum         Maximum
                           ��������������������������������������������������������������������������
                           BLOCK0        539      539      1475270.91      1470551.00      1478933.00

                           BLOCK1        532      532      1475268.80      1470551.00      1478933.00

                           BLOCK2        528      528      1475263.06      1470551.00      1478933.00

                           BLOCK3        523      523      1475258.68      1470551.00      1478933.00

                           BLOCK4        518      518      1475252.48      1470551.00      1478933.00

                           BLOCK5        514      514      1475256.64      1470551.00      1478933.00

                           BLOCK6        509      509      1475246.46      1470551.00      1478933.00
*/
TITLE2 "Trials per Block";
PROC MEANS N MEAN MIN MAX; CLASS Block_NAME Block_Trial_Count; VAR Session_ID;
	WHERE Trial_Number = 1;RUN;
/*
                   Block_      Block_Trial_
                   Name               count    N Obs        N            Mean         Minimum         Maximum
                   ������������������������������������������������������������������������������������������
                   BLOCK0                12      539      539      1475270.91      1470551.00      1478933.00

                   BLOCK1                12      532      532      1475268.80      1470551.00      1478933.00

                   BLOCK2                12      528      528      1475263.06      1470551.00      1478933.00

                   BLOCK3                24      523      523      1475258.68      1470551.00      1478933.00

                   BLOCK4                12      518      518      1475252.48      1470551.00      1478933.00

                   BLOCK5                12      514      514      1475256.64      1470551.00      1478933.00

                   BLOCK6                24      509      509      1475246.46      1470551.00      1478933.00
*/
TITLE2 "Block Pairings";
PROC MEANS N MIN MAX MAXDEC=0; CLASS Block_Pairing_Definition; VAR Session_ID;
WHERE Trial_Number = 1;RUN;
/*
                     Block_Pairing_Definition               N Obs        N         Minimum         Maximum
                     �������������������������������������������������������������������������������������
                     Bad,Good                                 522      522         1470551         1478933

                     Good,Bad                                 528      528         1470551         1478933

                     Physically Ill People,Mentally Ill       539      539         1470551         1478933

                     Physically Ill People/Bad,Mentally      1031     1031         1470551         1478933

                     Physically Ill People/Good,Mentally     1043     1043         1470551         1478933
*/
TITLE2 "Stimulus Items";
PROC MEANS N MEAN MIN MAX MAXDEC=0; CLASS Trial_NAME; VAR Session_ID;RUN; 
/*
           Trial_Name                                  N Obs        N            Mean         Minimum         Maximum
           ����������������������������������������������������������������������������������������������������������
           excellent                                    5206     5206         1475257         1470551         1478933

           great                                        5206     5206         1475257         1470551         1478933

           horrible                                     5206     5206         1475257         1470551         1478933

           nasty                                        5206     5206         1475257         1470551         1478933

           person with anxiety                          4184     4184         1475259         1470551         1478933

           person with appendicitis                     4184     4184         1475259         1470551         1478933

           person with cancer                           4184     4184         1475259         1470551         1478933

           person with depression                       4184     4184         1475259         1470551         1478933

           person with pneumonia                        4184     4184         1475259         1470551         1478933

           person with schizophrenia                    4184     4184         1475259         1470551         1478933

           terrible                                     5206     5206         1475257         1470551         1478933

           wonderful                                    5206     5206         1475257         1470551         1478933
*/
/***NOTE:
	It is likely that there will be some errant blocks and stimulus items from other 
	   Tasks/studies because of miscoding; these will be eliminated in subsequent phases;	   ***/

/***	Step 3 - Analysis with the SAS IAT macro for scoring					   ***/
/***	Step 3a - Preparing the datafile for the macro					    	   ***/
DATA Raw; SET Temp; * DROP PAIRS EITHER INCORRECT OR ADDED SINCE NOBEL VERSION BEGUN ;
/* not relevant
     IF Block_Pairing_Definition NOTIN 
      ('Female/Liberal Arts,Male/Science','George W. Bush/Good,Thomas Jefferso',
	   'Liberal Arts,Science','Male/Liberal Arts,Female/Science','  Science,Liberal Arts');
*/
PROC MEANS N MIN MAX MAXDEC=0; CLASS Block_Pairing_Definition; VAR Session_ID;
WHERE Trial_Number = 1;RUN;
/*
                     Block_Pairing_Definition               N Obs        N         Minimum         Maximum
                     �������������������������������������������������������������������������������������
                     Bad,Good                                 522      522         1470551         1478933

                     Good,Bad                                 528      528         1470551         1478933

                     Physically Ill People,Mentally Ill       539      539         1470551         1478933

                     Physically Ill People/Bad,Mentally      1031     1031         1470551         1478933

                     Physically Ill People/Good,Mentally     1043     1043         1470551         1478933
*/
DATA Raw; SET Raw;
  /* IF trial_number < 2 THEN DELETE; * this instruction from Sriram was based on 
     programming used in the Professions BIAT used in the Engineering studies, not here */
     ***recoding standard Block names into variables expected by the macro;
     IF Block_NAME IN ('BLOCK2') THEN Block = 'B3'; 
	 ELSE IF Block_NAME IN ('BLOCK3') THEN Block = 'B4';
	 ELSE IF Block_NAME IN ('BLOCK5') THEN Block = 'B6';
	 ELSE IF Block_NAME IN ('BLOCK6') THEN Block = 'B7'; 

	 ***	only keep the variables relevant for subsequent analysis;
     KEEP Trial_Number Block Session_ID Study_Name Block_Pairing_Definition Trial_Latency Trial_Error 
	  Task_NAME; RUN;

/***	Step 3b	- Calling the SAS macro
		- Rules defined and summarized in the Macro explanation script. If your data are
		    formatted/named differently from Project Implicit sites' default (e.g., if you are
		    analyzing data from a lab study), then see the macro documentation, especially
		    (a) and (b)
		- If SAS is not configured to automatically search for macros in the folder where you
		    saved the  sasMacro script (available for download from: 
		    http://projectimplicit.net/nosek/papers/scoringalgorithm.sas.txt ),
	 	   then open the file, highlight and run the %IatCalc + %IatAlgorithm macro, so that 
		   it is  compiled and ready for use in Steps 3b and 4.
		- If you're new to analyzing IAT data, double-check the SAS macro's documentation,
		    so that your  variables are in the correct (numeric, not character) format, and
		    that the variable coding  matches the expected format (e.g., Trial_Error is coded
		    as 1 for error, 0 for correct trials).
REMEMBER: This simply cleans the IAT data and calculates a D score as the performance difference 
		average(blocks 6-3, 7-4), YOU ARE RESPONSIBLE for knowing what those blocks refer to
		for each participant (if you follow standard IAT practice, then you will need to
	 	reverse-code the IAT score for those who completed the stereotype-incompatible pairing
	 	in Blocks 3 & 4. See Step 7 of this IatPreparation section for details
************           ************          ************          ************           ************/

/***	Calling the macro-Variables are indicated in the following format:
%iatAlgorithm(libin, libOut, indata, outdata, BLOCname, SESHID, Trial_Latency, Trial_Error, 
		VERROR, VEXTREME, VSTD);							  ***/

/* Fred plugged in macro here */
*IAT SCORING ALGORITHM FROM GREENWALD, NOSEK, & BANAJI, 2003
LAST UPDATE TO MACRO: 05/27/05

--------------------------------------
Most recent changes:
- 5/27/05: Macro will now output the number of trials used to calculate the mean for each block
- 5/27/05: Lots of updating to the introductory and explanatory text
- 5/14/05: Macro will now analyze non-critical blocks (B3, B4, B6, B7) such as single dimension 
           practice trial data and output their means, percent error, and percent fast responses
- 5/14/05: Setting SUBEXCL to '2' (missing data) only occurs if there is missing data from the
           four critical analysis blocks
--------------------------------------

This macro will transform a datafile with test trial latencies (stored as one line per response) 
for a standard format IAT (7 blocks) into a one line summary per subject of the IAT effect using
GNB's new scoring algorithm. The goal of this macro is to prepare IAT data for subsequent analysis.
However, this does not relieve the researcher from making conceptual decisions about how best to
analyze IAT data.  There are decisions to make about how the macro is applied, and the macro 
does not remove participants.  All subject exclusions must be made deliberately by the researcher.

To use this algorithm for your SAS program, perform these steps:  
(1) Run this script (do not edit the macro directly).  The macro will be loaded into active 
    memory and can be referred to in any analysis script
(2) Turn the datafile containing your IAT data into a SAS datafile and put it into folder to
    use as a library
(3) Identify that folder as a library and a folder where the SAS file output from the macro 
    will go (they can be the same folder)

EXAMPLE (without semicolons):
libname web 'H:\raceatt\'
libname outdata 'H:\raceattafter5\'

(4) Prepare your SAS datafile to be used by the SAS Macro.  The critical elements are:
    (a) If there are multiple IATs per participant,
        then the macro will need to be run for each IAT individually and the data can be 
        merged manually afterwards
    (b) the dataset must contain one row per IAT trial
    (c) the following variables must be available in each row: subject identifier, trial latency,
        trial error (0=correct, 1=error), name of block = B3, B4, B6, B7 (corresponding to the
        3,4,6,7 blocks in the standard 7-block IAT format)
    (d) the macro has no idea what task subjects were performing in the blocks, it just 
        calculates the performance difference average(6-3, 7-4), you are responsible for 
        knowing what those blocks refer to for each participant

EXAMPLE (without semicolons): BLOCK_NAME is an existing variable defining what is in each block,
BLOCK is the new variable that will be passed to the macro.
     if BLOCK_NAME in ('goodbad') then BLOCK = 'B1 ' 
	 else if BLOCK_NAME in ('bushkerry') then BLOCK = 'B2'
     else if BLOCK_NAME in ('bushgoodpractice') then BLOCK = 'B3'
	 else if BLOCK_NAME in ('bushgoodcritical') then BLOCK = 'B4'
     else if BLOCK_NAME in ('kerrybush') then BLOCK = 'B5'
	 else if BLOCK_NAME in ('bushbadpractice') then BLOCK = 'B6'
     else if BLOCK_NAME in ('bushbadcritical') then BLOCK = 'B7'

Note: the IAT score will be based on B3, B4, B6, B7 only.  If other blocks are included as in 
this example, the macro will calculate basic statistics: mean, error rate, number, fast responses.

(5) In your SAS program, enter the following statement 

%iatCalc(libIn, libOut, INDATA, OUTDATA, BLOCNAME, SESHID, TRIAL_LATENCY, TRIAL_ERROR, VERROR, VEXTREME, VSD)

(6) the variable names between the parantheses are placeholders, you will change those values
    to ones that correspond with the file and variable names in your own datafile, and the last
    three (VERROR, VEXTREME, VSD) will be replaced with numerical values (1 or 2) according to 
    the definitions described below

EXAMPLE (without semicolons):
%iatCalc(web, outdata, iatrace, CLEANiat, BLOCK, SUB, LATENCY, ERROR, 1, 2,1)

(7) run the script
(8) examine datafile to find your calculated IAT scores (in the example, the file would be called
    outdata.CLEANiat
(9) the macro does not remove individual participants, it only identifies IAT scores that are
    clearly problematic.  The SUBEXCL variable is 0 if the IAT performance passes basic standards,
    1 if more than 10% of trials in the four main blocks were <300ms, 2 if data from a main block
    was missing.  Individual subjects will need to be removed by the researcher, and any additional
    exclusion criteria will have to be defined and implemented by the researcher.  The macro is not 
    designed to make conceptual decisions about data inclusion except for the most conservative
    criteria.

Descriptions of what the macro expects for input, and what it will output are below.  

The macro expects the following types of libraries, datafiles, and data:

(a) variables identifying SAS library and filenames 
      libIn     = input SAS library name
      libOut    = output SAS library name
      INDATA    = filename of input SAS dataset in the input SAS library
      OUTDATA   = filename for the output SAS dataset in the output SAS library

(b) variables identifying four key pieces of information for calculating an IAT score
      BLOCNAME  = variable name for block identifier in the indata file: alphanumeric indication of the 
                  four trial blocks ('B3', 'B4', 'B6', 'B7' are critical blocks corresponding 
                  to B3, B4, B6, and B7 from GNB, 2002).  At present the macro requires that the variable 
                  passed here uses the names 'B3', 'B4', 'B6', 'B7' to refer to the B3-B7 blocks
	  SESHID    = variable name for unique subject identifier in the indata file
	  TRIAL_LATENCY  = variable name for latency of response for trial in the indata file
	  TRIAL_ERROR  = variable name for error coding: 0 if initial response was correct, 1 if initial response 
                  was incorrect in the indata file

(c) three options for variations in the D algorithm
      VERROR    = value: if '1' the algorithm will use error trial latencies, if '2' the algorithm will replace 
                  error trial latencies with blockmean+600 [blockmean is mean of correct responses only]
                  (1 is current standard for designs that require error correction, 2 if error correction is
                  not required)
      VEXTREME  = value: if '1' the algorithm provides no treatment of extreme values, if '2' the algorithm
                  will delete trials <400ms (2 is current standard)
      VSTD      = value: if '1' the block standard deviation is performed including error trials (corrected or not),
                  if '2' the block standard deviation is performed on correct responses only (1 is standard)

Note: The D algorithm is not the definitive scoring method for the IAT.  Improvements will be 
identified with continuing research by the academic community.  This macro conservatively 
applies the best algorithms identified by Greenwald et al., 2003.  Further enhancements to that 
algorithm will need to be validated and applied separately from this script.  The script itself 
will evolve more slowly than innovations in scoring to ensure that the validity of its procedures 
is well documented prior to their standardization.

The macro will output the following variables to a new file identified by the OUTDATA variable,
if there is an existing file by the named used for OUTDATA, the macro will overwrite it:
      SESHID  = unique subject identifier
      SUBEXCL = 0 for inclusion data, 1 for excluded data, 2 for incomplete data
      MB3     = mean of trial latencies for B3
      MB4     = mean of trial latencies for B4
      MB6     = mean of trial latencies for B6
      MB7     = mean of trial latencies for B7
      CS1     = standard deviation for B3 and B6 trials combined (correct trials only)
      CS2     = standard deviation for B4 and B7 trials combined (correct trials only)
      AS1     = standard deviation for B3 and B6 trials combined (all trials)
      AS2     = standard deviation for B4 and B7 trials combined (all trials)
      EB3     = percent errors of trials for B3
      EB4     = percent errors of trials for B4
      EB6     = percent errors of trials for B6
      EB7     = percent errors of trials for B7
      NB3     = number of trials used for mean calculation for B3
      NB4     = number of trials used for mean calculation for B4
      NB6     = number of trials used for mean calculation for B6
      NB7     = number of trials used for mean calculation for B7
      FB3     = percent fast responses of trials for B3
      FB4     = percent fast responses of trials for B4
      FB6     = percent fast responses of trials for B6
      FB7     = percent fast responses of trials for B7
      DIFF1   = MB6 - MB3
      DIFF2   = MB7 - MB4
      IAT1    = DIFF1/STD1
      IAT2    = DIFF2/STD2
	  IAT     = mean of IAT1 and IAT2

Note: The macro will also output M (mean), E (error), N (number) and F (fast) variables for all
other blocks included in the initial SAS datafile.  Those additional calculations will not affect 
the IAT score.  If you do not want values calculated for other blocks, remove them from the 
input datafile before invoking the script.
;

/*
MACROS for NEW IMPLICIT ASSOCIATION TEST SCORING ALGORITHM (Greenwald, Nosek, & Banaji, 2003)

FOR STANDARD USE, DO NOT CHANGE THE MACRO ITSELF.  JUST RUN THE MACRO AND CALL IT BY FOLLOWING
THE INSTRUCTIONS ABOVE.

/*
Sriram notes on BIAT data cleaning: Oct 30, 2009
ignore first 2 trials of rest of blocks
truncate RTs at 300, 3000  (or if you like 300, 2000)
*/
/*
MODIFY MACRO TO CONFORM TO SRIRAM RECOMMENDATIONS FOR BIAT, I.E., TRUNCATE RTS TO 300 AND 3000.
*/
%macro iatCalc(libIn, libOut, indata, outdata, BLOCNAME, SESHID, TRIAL_LATENCY, TRIAL_ERROR, VERROR, VEXTREME, VSTD);
%let divide= /; %let multiply= *; %let add = +; %let subtract = -;
%iatAlgorithm(libIn=&libIn, libOut=&libOut, indata=&indata, outdata=&outdata, BLOCNAME=&BLOCNAME, 
              SESHID=&SESHID, TRIAL_LATENCY=&TRIAL_LATENCY, TRIAL_ERROR=&TRIAL_ERROR, VERROR=&VERROR, VEXTREME=&VEXTREME, VSTD=&VSTD);run;
%mend iatCalc;

%macro iatAlgorithm(libIn, libOut, indata, outdata, BLOCNAME, SESHID, TRIAL_LATENCY, TRIAL_ERROR, VERROR, VEXTREME, VSTD);
data IAT; set &libIn..&indata; 
%*PRELIMINARY STEPS FOR HANDLING WEBDATA FORMATS;
    keep &BLOCNAME &SESHID &TRIAL_LATENCY &TRIAL_ERROR;
proc sort data=iat; by &SESHID &BLOCNAME;	
	%*options nonotes; %*suppress all Notes to log;	

data IAT; set IAT;

%*STEP 1 HAS BEEN REMOVED, NOW ALL DATA IS AT LEAST PARTIALLY ANALYZED;
%*STEP 1: Include data from B3, B4, B6, B7;
   %*if &BLOCNAME in ('B3', 'B4', 'B6', 'B7') then ;
   %*else delete;

%*STEP 2a: Recode trial latencies > 3,000ms PER SRIRAM;
   if &TRIAL_LATENCY > 3000 then TRIAL_LATENCY = 3000;

%*STEP 2b: Eliminate subjects for whom more than 10% of trials have latencies < 300ms;
   else if &TRIAL_LATENCY < 0 then delete; %*for miscoded data in datafile indicating negative response times;
   else if -1 < &TRIAL_LATENCY < 300 then FAST = 1;
   else FAST = 0;

data FASTDATA; set IAT; keep &SESHID &BLOCNAME FAST;
proc sort; by &SESHID &BLOCNAME;
proc means data=IAT noprint; by &SESHID &BLOCNAME; var FAST; output out=means mean=MEAN;
proc transpose data=means prefix=F name=name out=FASTMEAN; by &SESHID; id &BLOCNAME;
data FASTMEAN; set FASTMEAN; where name='MEAN'; FASTM = mean(FB3, FB4, FB6, FB7);
   if FASTM > .10 then SUBEXCL = 1; else SUBEXCL = 0; 
   %*SUBEXCL = 0 (include data), 1 (exclude data - too many fast responses), 2 (exclude data - missing data);
   %*The SUBEXCL variable needed to be reintroduced to the final dataset in STEP 12;

%*STEP 3: Use all trials;
   %*in the conventional algorithm the first two trials of each block would be dropped here;

%*STEP 4: No extreme value treatment <or> SRIRAM RECOMMENDED BIAT STEP TRUNCATE <300ms;
data IAT; set IAT; 
    %*if &VEXTREME = 1 then do nothing here;
    if &VEXTREME = 2 then do; if &TRIAL_LATENCY < 300 then TRIAL_LATENCY = 300; end;

proc sort data=IAT; by &SESHID &BLOCNAME;

%*STEP 5: Compute mean of correct latencies for each block;
data CORR; set IAT;
   %*if &VERROR is 1 then means and SDs will be calculated for the entire set of latencies;
   if &VERROR = 2 then do; if &TRIAL_ERROR NE 0 then delete; end;
   keep &SESHID &BLOCNAME &TRIAL_LATENCY &TRIAL_ERROR;
proc means data=CORR noprint; by &SESHID &BLOCNAME; var &TRIAL_LATENCY; output out=means mean=MEAN;
proc transpose data=means prefix=M name=name out=CORRMEAN; by &SESHID; id &BLOCNAME;
data CORRMEAN; set CORRMEAN; where name='MEAN';

%*STEP 5x: Count number of trials used for each block mean calculation;
proc means data=CORR noprint; by &SESHID &BLOCNAME; var &TRIAL_LATENCY; output out=means n=N; 
proc transpose data=means prefix=N name=name out=NMEAN; by &SESHID; id &BLOCNAME;
data NMEAN; set NMEAN; where name='N';run; 
*outputs a file with the number of actual trials in each block;

%*STEP 6a: Compute pooled SD for B3 & B6, and separately for B4 & B7 for correct trials only;
data SD; set CORR;
   if &TRIAL_ERROR NE 0 then delete;  *drop error trials;
   if &BLOCNAME in ('B3', 'B6') then TD = '1';
   else if &BLOCNAME in ('B4', 'B7') then TD = '2';
   else delete;
   drop &BLOCNAME;
proc sort data=SD; by &SESHID TD;
proc means data=SD noprint; by &SESHID TD; var &TRIAL_LATENCY; output out=means std=STD;
proc transpose data=means prefix=CS name=name out=CORRSTD2; by &SESHID; id TD;
data CORRSTD2; set CORRSTD2; where name='STD';

%*STEP 6b: Compute pooled SD for B3 & B6, and separately for B4 & B7 including error trials;
data SD; merge IAT CORRMEAN; by &SESHID;
   if &TRIAL_ERROR < 0 then delete;
   else if &TRIAL_ERROR > 1 then delete; %*get rid of coding errors;
   else if &TRIAL_ERROR = 1 and &VERROR = 2 then do;
      if &BLOCNAME in ('B3') then &TRIAL_LATENCY = MB3 + 600;
	  else if &BLOCNAME in ('B4') then &TRIAL_LATENCY = MB4 + 600;
	  else if &BLOCNAME in ('B6') then &TRIAL_LATENCY = MB6 + 600;
	  else if &BLOCNAME in ('B7') then &TRIAL_LATENCY = MB7 + 600;
   end;
   if &BLOCNAME in ('B3', 'B6') then TD = '1';
   else if &BLOCNAME in ('B4', 'B7') then TD = '2';
   else delete;
   drop &BLOCNAME;
proc sort data=SD; by &SESHID TD;
proc means data=SD noprint; by &SESHID TD; var &TRIAL_LATENCY; output out=means std=STD;
proc transpose data=means prefix=AS name=name out=CORRSTD1; by &SESHID; id TD;
data CORRSTD1; set CORRSTD1; where name='STD';

%*STEP 7: Replace error latencies with block mean + 600ms 
<or> use latency from stimulus onset to correct response (when correct response is required);
data ERR; set IAT;
   keep &SESHID &BLOCNAME &TRIAL_ERROR;
   if &TRIAL_ERROR < 0 then delete;
   else if &TRIAL_ERROR > 1 then delete; %*get rid of coding errors;
proc means data=ERR noprint; by &SESHID &BLOCNAME; var &TRIAL_ERROR; output out=means mean=MEAN;
proc transpose data=means prefix=E name=name out=ERRMEAN; by &SESHID; id &BLOCNAME;
data ERRMEAN; set ERRMEAN; where name='MEAN';

%*STEP 7 continued: combining data;
data COMBINE; merge CORRMEAN NMEAN CORRSTD1 CORRSTD2 ERRMEAN FASTMEAN; by &SESHID; %*combining datasets for calculating final means;
   if &VERROR=2 then do;
      array BLOCKMeans(*) MB: ;
      array BLOCKCstds(*) CS: ;
	  array BLOCKAstds(*) AS: ;
      array BLOCKErrs(*) EB: ;
      do i=1 to dim(BLOCKMeans); %*for each of the four blocks replace error trials with mean + 600ms;
	     BLOCKMeans{i} = (1-BLOCKErrs{i})*BLOCKMeans{i} + (BLOCKErrs{i})*(BLOCKMeans{i}+600);			
      end;
   end;

%*STEP 8: No transformation of latencies;
    %*in the conventional algorithm, test latencies would be log transformed prior to the transposing in the current format;

%*STEP 9: Average latencies for each of the four blocks;
    %*this step was already accomplished in the do loop above;

%*STEP 10: Compute two differences B6-B3 and B7-B4 (does not account for pairing order);
   DIFF1 = MB6 - MB3;
   DIFF2 = MB7 - MB4;

%*STEP 11: Divide each difference by associated pooled SD from STEP 6a or 6b;
   If &VSTD = 2 then do;
      IAT1 = DIFF1/CS1;
      IAT2 = DIFF2/CS2;
   end;
   else do; %*IF VSTD = 1 also set as default;
      IAT1 = DIFF1/AS1;
	  IAT2 = DIFF2/AS2;
   end; 

%*STEP 12: Average quotients from STEP 11;
   IAT = mean(IAT1, IAT2);

%*if there is missing data in critical blocks, mark data as excluded (SUBEXCL=2);
                               %* old way of doing it  do i=1 to dim(BLOCKMeans);
                               %*  if BLOCKMeans{i} = . then SUBEXCL = 2;
                               %*end;
   if MB3=. | MB4=. | MB6=. | MB7=. then SUBEXCL=2; *if missing data from any of critical blocks then marked;

data &libout..&outdata (drop=i name);
	set COMBINE;
run;
%mend iatAlgorithm;
*END OF ALGORITHM;

DATA IatTemp1; SET raw;
  BlockN = 'B1';  
      IF Block IN ('B3') THEN BlockN = 'B3';
      IF Block IN ('B4') THEN BlockN = 'B4';
      IF Block IN ('B6') THEN BlockN = 'B6';
      IF Block IN ('B7') THEN BlockN = 'B7';

   IF BlockN IN ('B1') THEN DELETE;

DATA IatTemp1; SET IatTemp1;
   ***	Retain only critical blocks; IF BlockN IN ('B3','B4','B6','B7');
%iatCalc(WORK, WORK, IatTemp1, IatTemp2, BlockN, Session_ID, Trial_Latency, Trial_Error, 1,2,1);
RUN;

DATA IatTemp2; SET IatTemp2;
  BIAT = IAT; BIAT1 = IAT1; BIAT2 = IAT2; BIATexcl = subexcl; 
  BIATdiff1 = diff1; BIATdiff2 = diff2; BIATas1=as1; BIATas2=as2; BIATcs1=cs1; BIATcs2=cs2;
  BIATnb1 = nb1; BIATnb2 = nb2;
  BIATnb3 = nb3; BIATnb4 = nb4; BIATnb5 = nb5; BIATnb6 = nb6; BIATnb7 = nb7;
  BIATmb1 = mb1; BIATmb2 = mb2;
  BIATmb3 = mb3; BIATmb4 = mb4; BIATmb5 = mb5; BIATmb6 = mb6; BIATmb7 = mb7;
  BIATeb1 = eb1; BIATeb2 = eb2;
  BIATeb3 = eb3; BIATeb4 = eb4; BIATeb5 = eb5; BIATeb6 = eb6; BIATeb7 = eb7;
  BIATfb1 = fb1; BIATfb2 = fb2;
  BIATfb3 = fb3; BIATfb4 = fb4; BIATfb5 = fb5; BIATfb6 = fb6; BIATfb7 = fb7;
  BIATfastm = fastm;
  drop IAT IAT1 IAT2 subexcl nb1 nb2 nb3 nb4 nb5 nb6 nb7 mb1 mb2 mb3 mb4 mb5 mb6 mb7 _LABEL_
       eb1 eb2 eb3 eb4 eb5 eb6 eb7 fb1 fb2 fb3 fb4 fb5 fb6 fb7 fastm diff1 diff2 as1 as2 cs1 cs2;
RUN;

proc freq data = raw; tables trial_number; WHERE block IN ('B4','B7'); run;
/*
                                                                        Cumulative    Cumulative
                               Trial_Number    Frequency     Percent     Frequency      Percent
                               �����������������������������������������������������������������
                                          0        1032        4.17          1032         4.17
                                          1        1032        4.17          2064         8.33
                                          2        1032        4.17          3096        12.50
                                          3        1032        4.17          4128        16.67
                                          4        1032        4.17          5160        20.83
                                          5        1032        4.17          6192        25.00
                                          6        1032        4.17          7224        29.17
                                          7        1032        4.17          8256        33.33
                                          8        1032        4.17          9288        37.50
                                          9        1032        4.17         10320        41.67
                                         10        1032        4.17         11352        45.83
                                         11        1032        4.17         12384        50.00
                                         12        1032        4.17         13416        54.17
                                         13        1032        4.17         14448        58.33
                                         14        1032        4.17         15480        62.50
                                         15        1032        4.17         16512        66.67
                                         16        1032        4.17         17544        70.83
                                         17        1032        4.17         18576        75.00
                                         18        1032        4.17         19608        79.17
                                         19        1032        4.17         20640        83.33
                                         20        1032        4.17         21672        87.50
                                         21        1032        4.17         22704        91.67
                                         22        1032        4.17         23736        95.83
                                         23        1032        4.17         24768       100.00
*/

ODS HTML PATH=WebOut FILE="Prep.IAT.03.MacroAnalysis.htm";

/***	Step 3c - Getting other info from IAT datafile not calculated BY the SAS macro
		- These are specific to web datasets						   ***/
DATA Input; SET Raw;   *pull up the Raw dataset for use and keep only the necessary variables;
***	Retain only 2nd critical blocks; IF Block IN ('B3','B4','B6','B7');
   KEEP Trial_Number Block Block_Pairing_Definition Session_ID Study_Name Task_NAME Trial_Error 
	Trial_Latency;
PROC SORT; BY Session_ID Block; RUN; *Sort by session and Block so TRANSPOSE statement will work below;

/**********          **********          **********          **********          **********          **********/          
/* For purposes of record-keeping, perhaps to mention in a Method section, and to spot possible anomalies,
  identify how many critical trials were RECODED by the MACRO to 3000ms for being too slow (> 3000ms) */
DATA criticalTrials; SET input;  
  tooslow = 0; IF TRIAL_LATENCY > 3000 THEN tooslow = 1;
ODS HTML PATH=WebOut FILE="DESC.BIATtrialsGT3000ms.html";
TITLE 'BIAT Critical Trials > 3000ms';
   PROC FREQ; TABLES tooslow; RUN;  
ODS HTML CLOSE;

/* Identify how many participants (and how many per participant) had such slow trials */
DATA GT3000; SET criticalTrials; IF tooslow = 1;
 
ODS HTML PATH=WebOut FILE="DESC.BIATpeopleGT3000ms.html";
TITLE '# Ps with Critical Trials > 3000ms';
   PROC FREQ; TABLES SESSION_ID; RUN;  
ODS HTML CLOSE;/*

/***	Identify study name. If you have data for only 1 study this may be redundant; if so, then 
	   block pairing Definition (or for new data, the Task_ID from the SessionTask data) will be
	   more useful 										   ***/
DATA Cond; SET Input; WHERE Trial_Number = 0 AND Block = 'B3'; 
 	KEEP Trial_Number Session_ID Block Study_Name;
PROC TRANSPOSE DATA=Cond NAME=NAME OUT=SpdMeans; BY Session_ID; VAR Study_Name;
DATA Cond; SET SpdMeans(RENAME=(COL1=Study_Name)); KEEP Study_Name Session_ID; 
PROC SORT DATA=Cond;BY Session_ID;RUN; *outputs a file with Session_ID and Study_name;

/***	Identify Block Pairing definitions so that the data doesn't get screwed up 		   ***/
DATA Pairing; SET Input; WHERE Trial_Number = 0 AND Block = 'B3'; 
 	KEEP Session_ID Block Block_Pairing_Definition;

PROC TRANSPOSE DATA=Pairing PREFIX=P NAME=NAME OUT=Pairing(DROP=Name); BY Session_ID; 
	VAR Block_Pairing_Definition;ID Block;

/***	Outputs a file with Session_ID and the Block_Pairing_defs for all blocks 		   ***/
PROC SORT DATA=Pairing; BY Session_ID; RUN; 

/***	N per Block - including extreme outliers; 	vars=nBLOCK0, nBLOCK, ... ; SET=ns	   ***/
DATA Ns; SET Input; KEEP Session_ID Block Trial_Number;
PROC MEANS DATA=Ns NOPRINT; BY Session_ID Block; VAR Trial_Number; OUTPUT OUT=MEANS N=N; 
PROC TRANSPOSE DATA=MEANS PREFIX=N NAME=NAME OUT=SpdMeans; BY Session_ID; ID Block;
PROC SORT DATA=SpdMeans; BY Session_ID;
/***	Outputs a file with the number of actual trials in each Block 				   ***/
DATA Ns; SET SpdMeans; WHERE NAME='N';RUN; 

ODS HTML CLOSE;

proc freq data = input; tables block_pairing_definition; run;
/*
                                                                                    Cumulative    Cumulative
                    Block_Pairing_Definition               Frequency     Percent     Frequency      Percent
                    ����������������������������������������������������������������������������������������
                    Physically Ill People/Bad,Mentally        29820       49.89         29820        49.89
                    Physically Ill People/Good,Mentally       29952       50.11         59772       100.00

*/
/* Compute INTERNAL CONSISTENCY using alternating couplets of trials, rather than
   traditional "practice" vs. critial blocks */
DATA couplets; SET input;
 trial_number = trial_number + 1;
 IF block IN ('B4','B7') THEN trial_number = trial_number + 12;
proc freq data = couplets; tables trial_number; run;

DATA coupletscombined; SET couplets;
  IF block_pairing_definition IN ('Physically Ill People/Good,Mentally') 
    AND MOD(trial_number,4) IN (0,1) THEN blockn = 'B3'; *MOD means REMAINDER;
   ELSE IF block_pairing_definition IN ('Physically Ill People/Good,Mentally')
    AND MOD(trial_number,4) IN (2,3) THEN blockn = 'B4';
   ELSE IF block_pairing_definition IN ('Physically Ill People/Bad,Mentally')
    AND MOD(trial_number,4) IN (0,1) THEN blockn = 'B6';
   ELSE IF block_pairing_definition IN ('Physically Ill People/Bad,Mentally')
    AND MOD(trial_number,4) IN (2,3) THEN blockn = 'B7';
   ELSE DELETE;
%iatCalc(WORK, WORK, coupletscombined, IatTemp4, blockn, session_id, trial_latency, trial_error,  1, 2, 1);

DATA IatTemp4; SET IatTemp4;
  BIATy = IAT1; BIATz = IAT2; 
  KEEP session_id BIATy BIATz;
PROC MEANS; VAR BIATy BIATz; RUN;

/* Blocks 3 and 6 only */
DATA couplets1; SET couplets; IF trial_number LT 13;
  IF block_pairing_definition IN ('Physically Ill People/Good,Mentally') 
    AND MOD(trial_number,4) IN (0,1) THEN blockn = 'B3'; *MOD means REMAINDER;
   ELSE IF block_pairing_definition IN ('Physically Ill People/Good,Mentally')
    AND MOD(trial_number,4) IN (2,3) THEN blockn = 'B4';
   ELSE IF block_pairing_definition IN ('Physically Ill People/Bad,Mentally')
    AND MOD(trial_number,4) IN (0,1) THEN blockn = 'B6';
   ELSE IF block_pairing_definition IN ('Physically Ill People/Bad,Mentally')
    AND MOD(trial_number,4) IN (2,3) THEN blockn = 'B7';
   ELSE DELETE;
%iatCalc(WORK, WORK, couplets1, IatTemp5, blockn, session_id, trial_latency, trial_error,  1, 2, 1);

DATA IatTemp5; SET IatTemp5;
  BIATy1 = IAT1; BIATz1 = IAT2; 
  KEEP session_id BIATy1 BIATz1;
PROC MEANS; VAR BIATy1 BIATz1; RUN;

/* Blocks 4 and 7 only */
DATA couplets2; SET couplets; IF trial_number GT 12;
  IF block_pairing_definition IN ('Physically Ill People/Good,Mentally') 
    AND MOD(trial_number,4) IN (0,1) THEN blockn = 'B3'; *MOD means REMAINDER;
   ELSE IF block_pairing_definition IN ('Physically Ill People/Good,Mentally')
    AND MOD(trial_number,4) IN (2,3) THEN blockn = 'B4';
   ELSE IF block_pairing_definition IN ('Physically Ill People/Bad,Mentally')
    AND MOD(trial_number,4) IN (0,1) THEN blockn = 'B6';
   ELSE IF block_pairing_definition IN ('Physically Ill People/Bad,Mentally')
    AND MOD(trial_number,4) IN (2,3) THEN blockn = 'B7';
   ELSE DELETE;
%iatCalc(WORK, WORK, couplets2, IatTemp6, blockn, session_id, trial_latency, trial_error,  1, 2, 1);

DATA IatTemp6; SET IatTemp6;
  BIATy2 = IAT1; BIATz2 = IAT2; 
  KEEP session_id BIATy2 BIATz2;
PROC MEANS; VAR BIATy2 BIATz2; RUN;
/***	Step 5	- Merge IAT data, this is analyzed, but still contains some errant coding	   ***/

ODS HTML PATH=WebOut FILE="DESC.BIAT.trialsAlternatingCouplets.html";
TITLE 'Trials by block for Alternating Couplet split-halves';
TITLE2 'Combined blocks 3,4,6,7';
PROC SORT DATA = coupletscombined; BY blockn ;
proc freq data = coupletscombined; tables trial_number; BY blockn; run;
TITLE2 'Blocks 3 and 6 only';
PROC SORT DATA = couplets1; BY blockn ;
proc freq data = couplets1; tables trial_number; BY blockn; run;
TITLE2 'Blocks 4 and 7 only';
PROC SORT DATA = couplets2; BY blockn ;
proc freq data = couplets2; tables trial_number; BY blockn; run;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="Prep.IAT.05.Merging.Scores.Unreversed.htm";

PROC CONTENTS DATA=Cond;
PROC CONTENTS DATA=Pairing;
PROC CONTENTS DATA=Ns;
PROC CONTENTS DATA=IatTemp2;
PROC CONTENTS DATA=IatTemp4;
PROC CONTENTS DATA=IatTemp5;
PROC CONTENTS DATA=IatTemp6;
RUN;

PROC SORT DATA=Cond;BY Session_ID;
PROC SORT DATA=Pairing;BY Session_ID;
PROC SORT DATA=Ns;BY Session_ID;
PROC SORT DATA=IatTemp2;BY Session_ID;
PROC SORT DATA=IatTemp4;BY Session_ID;
PROC SORT DATA=IatTemp5;BY Session_ID;
PROC SORT DATA=IatTemp6;BY Session_ID;
RUN;


/***	MERGE IAT DATA FROM ALL PARTS ABOVE							   ***/
DATA clean.IatMerged; *** Uses drop to get rid of irrelevant variables;
	MERGE Cond Pairing Ns IatTemp2 IatTemp4 IatTemp5 IatTemp6;
	*** all SAS files used above;
	BY Session_ID; *** MERGE on Session_ID;
RUN;
TITLE2 'numeric variables for clean.IatMerged';
*** Contents of pairing definition for first critical block;
PROC MEANS MAXDEC=0;VAR Session_ID; CLASS Pb3; RUN; 

PROC MEANS;RUN;  *** Summary of all data so far;
/***NOTE:
	IAT data not completely cleaned (bad subjects, weird data, etc), but it is transposed,
	and IAT scores have been calculated							   ***/

/***	Step 6	- Identify and remove errant trials/participants in transposed, but non-reverse-coded 
		    IAT data									   ***/
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="Prep.IAT.06.Errant.Cleanup.htm";

/***	Step6a	- Slow trials and participants:
		- Identify how many critical trials were deleted for being too slow 
          (>10000ms for IAT; >3000 for BIAT per SRIRAM)
		     NOTE: Normally, this should be only a small percentage of all trials; 
  			If not, check this out further, as it could be problematic.
		     Similarly, in the following FREQ, you shouldn't see any one participant have
			more than a few slow trials. If so, they would need to be dropped	   ***/
DATA CriticalTrials; SET Raw; IF Block IN ('B3','B4','B6','B7');
TooSlow = 0; IF Trial_Latency > 3000 THEN TooSlow = 1;

TITLE2 '2nd Critical Block TRIALS > 3000ms' ;
PROC FREQ; TABLES TooSlow; ;RUN;

/***	Identify how many participants had such slow trials.					   ***/
TITLE2 'SeshIDs with TooSlow';
PROC FREQ; TABLES Session_ID;
WHERE TooSlow=1;
RUN;

/***	Calculating QuitAfter variable, to correspond to PCIAS analysis, coding when participants
	dropped out of IAT									   ***/
/*
DATA clean.IatMerged; SET clean.IatMerged;
 FORMAT QuitAfter 8.;
 IF SubExcl NE 0 THEN DO;
	IF 		       Nb1 NE . & NMiss(of Nb2-Nb7) = 6	THEN QuitAfter = 1;
	  ELSE IF NMiss(of Nb1-Nb2) = 0 & NMiss(of nb3-nb7) = 5 THEN QuitAfter = 2;
	  ELSE IF NMISS(of Nb1-Nb3) = 0 & NMISS(of nb4-nb7) = 4 THEN QuitAfter = 3;
	  ELSE IF NMISS(of Nb1-Nb4) = 0 & NMISS(of nb5-nb7) = 3 THEN QuitAfter = 4;
	  ELSE IF NMISS(of Nb1-Nb5) = 0 & NMISS(nb6,   nb7) = 2 THEN QuitAfter = 5;
	  ELSE IF NMISS(of Nb1-Nb6) = 0 & 	       nb7 =  .	THEN QuitAfter = 6;
	ELSE;
  END;
run;
/***	Step 6b	- Descriptives of different variables to identify errant coding in IAT or data
		     that should be excluded							   ***/

TITLE2 'DATA conforming to standard rules';
PROC MEANS MEAN MIN MAX DATA=clean.IatMerged; CLASS BIATexcl; 
*WHERE statement subsets only data that conforms to standard rules;
     WHERE Nb3 = 12 AND Nb4 = 24 AND Nb6 = 12 AND Nb7 = 24; RUN;

/***	Identifying Block_Pairing_definitions for cleaning.
NOTE:	If you have very long pairing definitions you'll have to increase the $48. length in the 
     	FORMAT statement, but this length lets them print on 1 line				   ***/

/***OPTIONAL
     __________Outputting multiple PROCs to a page______________________
	*** Setting it to a space causes SAS to fill each page before going to the next one ; 
	Options formdlim=' ' ;		
	***  the value of formdlim to the default,
  		so that each new Proc will start on a new page ;
	Options formdlim='' ;									   ***/
/*
TITLE2 'Pb3';PROC MEANS MEAN MIN MAX;CLASS Pb3;VAR IAT;FORMAT Pb3 $48.;RUN;  
TITLE2 'Pb4';PROC MEANS MEAN MIN MAX;CLASS Pb4;VAR IAT;FORMAT Pb4 $48.;RUN;  
TITLE2 'Pb6';PROC MEANS MEAN MIN MAX;CLASS Pb6;VAR IAT;FORMAT Pb6 $48.;RUN;  
TITLE2 'Pb7';PROC MEANS MEAN MIN MAX;CLASS Pb7;VAR IAT;FORMAT Pb7 $48.;RUN;  


/***	Checking for errant data recording							   ***/
/*
TITLE2 'Nb3';PROC MEANS MEAN MIN MAX;CLASS Nb3;VAR IAT;RUN;  
TITLE2 'Nb4';PROC MEANS MEAN MIN MAX;CLASS Nb4;VAR IAT;RUN;
TITLE2 'Nb6';PROC MEANS MEAN MIN MAX;CLASS Nb6;VAR IAT;RUN;
TITLE2 'Nb7';PROC MEANS MEAN MIN MAX;CLASS Nb7;VAR IAT;RUN;
Options formdlim='' ;

/***	Step 6c	- Errant data clean-up: Removing problem data identified in 6a&b
		- Dropping tasks that should not be counted as sessions because it is test data or 
		    has clear data transfer/storage problems (i.e., other IAT pairings, as in 
		    block pairing definitions in Pb3-Pb7, or missing almost all trials, as when
		    Nb1-Nb7 are < 8								   ***/
/*
DATA clean.IatMerged;SET clean.IatMerged;
/***NOTE:
	If your primary interest is the explicit data, you might add "" [blank] to these in
	   statements to retain explicit data that's missing valid IAT data (e.g., if they dropped
	   out of the study before they finished the IAT).
	As this is written, it drops all participants who didn't have a "valid" IAT block pairing
	   definition. It's necessary if you're cleaning old Demonstration-site data, as it's hard
	   to identify phantom sessions in a long-running study. If you have a research-site study
	   in which you're primarily interested in the explicit, then you don't care if they have
	   valid										   ***/

***	testing data or data error;
  /* IF Nb3 < 10 | Nb4 < 10 | Nb6 < 10 | Nb7 < 10 | Nb1 < 10 | Nb2 < 10 | Nb5 < 10 THEN DELETE; */
/*
RUN;
PROC MEANS MEAN MIN MAX;CLASS SubExcl;RUN;  ***	summary of all data so far;

/***	Step 7	- Reverse-code and run initial checks of cleaned, reverse-coded IAT scores 
NOTE: 		- Half of these trials must be reverse-scored. Because order of stereotype-congruent
		     /stereotype-incongruent pairings is counterbalanced, B3/B4 and B6/B7 are
		     opposite pairings. Until this is done, the IAT score will be "washed out"
		     because of the counterbalancing.
		     Use the pairings listed in PB3 to reverse-code the IATS below		   ***/
ODS HTML CLOSE;

DATA clean.IatReversed; SET clean.IatMerged;
   ***	Reverse code IAT scores for counterbalancing conditions. Include pairing definition
	  for stereotype-incongruent counterbalanced order;
   IF Pb3 IN ("Physically Ill People/Bad,Mentally") THEN DO; 
	BIAT=0-BIAT; BIAT1=0-BIAT1; BIAT2=0-BIAT2; BIATDIFF1=0-BIATDIFF1; BIATDIFF2=0-BIATDIFF2;
   END;
/***	SubExcl indicates various exclusion categories.
  Additional criteria below are adapted from PCIAS analysis and informed by empirical review 
    of these particular BIAT error rates (notably, excluding practice blocks 3 and 6 since these
    only have 12 trials each.
    ***/
   ***	Calculate mean errors for combined blocks;
   BIATerrmean = (BIATeb4*BIATnb4 + BIATeb7*BIATnb7) / (BIATnb4+BIATnb7);
   BIATfastmean = (BIATfb4*BIATnb4 + BIATfb7*BIATnb7) / (BIATnb4+BIATnb7);
   *identify possible exclusion criteria, but don't delete yet;
   BIATexclError = 0;
      if BIATerrmean > .30 then BIATexclError = 1;
     ELSE IF BIATeb4 > .40 | BIATeb7 > .40 then BIATexclError = 2;
   BIATexclFast = 0;
    if BIATfb4 > .25 | BIATfb7 > .25 then BIATexclFast = 1;
RUN;

DATA Clean.IatReversed;SET clean.IatReversed;
 BIATdq = 0;
 IF BIATexcl NE 0 OR BIATexclError NE 0 OR BIATexclFast NE 0 THEN BIATdq = 1;
RUN;

PROC SORT DATA=Clean.IatReversed; BY Session_ID;
RUN;

/***NOTE:
	IAT data is now transposed, calculated, and sorted for merging with other data		   ***/

/***********           ************          ************          ************           *************
MD-Merging Data -- Cleaning Raw, Merged & Transposed Section 
************           ************          ************          ************           ************/

/***	Step 0	- Merge the transposed datafiles						   
	The Where statement eliminates ghost sessions of those who were assigned but chose not to 
	   participate in the study.						 		   ***/
proc contents data=CLean.Demos;run;
DATA Clean.Merged;  /* (WHERE=(Consenter=1)) */
/***NOTE:
	Add any other SessionTask datasets that are needed to identify conditions or other requested
	information.										   ***/
MERGE Clean.Order clean.iatcamiorder clean.TimeOnTasks Clean.IatReversed  Clean.Explicit 	
	/* Clean.timeontasks Clean.TimeOnTasks clean.ECond clean.ICond 
		*** For newer data-Clean.ExpAssign can usually replace this 			     */
	Clean.Demos 
	/*	*** For studies that used the PI-research site participant pool
		    If using this, then don't add Clean.Sessions				     */
;BY Session_ID; RUN;

PROC FREQ DATA = clean.merged; TABLES pb3; RUN;

DATA Clean.Merged; SET Clean.Merged;
 BIAT_MIgood1st = .; 
  IF Pb3 IN ("Physically Ill People/Bad,Mentally") THEN BIAT_MIgood1st = 1;
  IF Pb3 IN ("Physically Ill People/Good,Mentally") THEN BIAT_MIgood1st = 0;
 DROP 
 name class engfluency ethnic income
BIATnb1 BIATnb2 BIATnb5 BIATmb1 BIATmb2 BIATmb5 
BIATeb1 BIATeb2 BIATeb5 BIATfb1 BIATfb2 BIATfb5;
RUN;

DATA Clean.Merged; SET Clean.Merged;
  camiyes1 = 1; IF cami1 = . THEN camiyes1 = 0;
  camiyes2 = 1; IF cami2 = . THEN camiyes2 = 0;
  camiyes3 = 1; IF cami3 = . THEN camiyes3 = 0;
  camiyes4 = 1; IF cami4 = . THEN camiyes4 = 0;
  camiyes5 = 1; IF cami5 = . THEN camiyes5 = 0;
  camiyes6 = 1; IF cami6 = . THEN camiyes6 = 0;
  camiyes7 = 1; IF cami7 = . THEN camiyes7 = 0;
  camiyes8 = 1; IF cami8 = . THEN camiyes8 = 0;
  camiyes9 = 1; IF cami9 = . THEN camiyes9 = 0;
  camiyes10 = 1; IF cami10 = . THEN camiyes10 = 0;
  camiresponses = SUM
    (camiyes1, camiyes2, camiyes3, camiyes4, camiyes5, 
     camiyes6, camiyes7, camiyes8, camiyes9, camiyes10);
/*
                                                                         Cumulative    Cumulative
                               camiresponses    Frequency     Percent     Frequency      Percent
                               ������������������������������������������������������������������
                                           0          51        9.01            51         9.01
                                           1           1        0.18            52         9.19
                                           7           1        0.18            53         9.36
                                           8           2        0.35            55         9.72
                                           9           5        0.88            60        10.60
                                          10         506       89.40           566       100.00
*/
 camitotal = SUM
    (cami1, cami2, cami3, cami4, cami5, 
     cami6, cami7, cami8, cami9, cami10);
 IF camiresponses < 10 THEN camitotal = .; *only 9 of 515 Ps who answered ANY cami did less than every item;
 camimean = MEAN
    (cami1, cami2, cami3, cami4, cami5, 
     cami6, cami7, cami8, cami9, cami10);
 IF camiresponses < 10 THEN camimean = .; *only 9 of 515 Ps who answered ANY cami did less than every item;
RUN;

/* create "working" feedback score */
DATA Clean.Merged; SET Clean.Merged;
 feedbackcorrect = .;
IF feedback IN 
  ('Your data suggest a strong implicit association of persons with mental illness with good (versus bad). **This is because you were much faster at the c')
  THEN feedbackcorrect = -3;
ELSE IF feedback IN 
  ('Your data suggest a moderate implicit association of persons with mental illness with good (versus bad). **This is because you were somewhat faster at')
  THEN feedbackcorrect = -2;
ELSE IF feedback IN 
  ('Your data suggest a slight implicit association of persons with mental illness with good (versus bad). **This is because you were slightly faster at t')
  THEN feedbackcorrect = -1;
ELSE IF feedback IN 
  ('Your data suggest no difference in your implicit association of persons with mental illness with good versus bad. **This is because your performance o')
  THEN feedbackcorrect = 0;
ELSE IF feedback IN 
  ('Your data suggest a slight implicit association of persons with mental illness with bad (versus good). **This is because you were slightly faster at t')
  THEN feedbackcorrect = 1;
ELSE IF feedback IN 
  ('Your data suggest a moderate implicit association of persons with mental illness with bad (versus good). **This is because you were somewhat faster at')
  THEN feedbackcorrect = 2;
ELSE IF feedback IN 
  ('Your data suggest a strong implicit association of persons with mental illness with bad (versus good). **This is because you were much faster at the c')
  THEN feedbackcorrect = 3;
ELSE IF feedback IN 
  ('There were too many errors made to determine a result.')
  THEN feedbackcorrect = 8;
ELSE IF feedback IN 
  ('There were too many fast trials to determine a result.')
  THEN feedbackcorrect = 9;

feedbackinvalid = .;
 IF feedbackcorrect GE -3 AND feedbackcorrect LE 3 THEN feedbackinvalid = 0;
 ELSE IF feedbackcorrect GT 3 THEN feedbackinvalid = 1;
RUN; 
PROC SORT DATA = clean.merged; BY feedbackcorrect; RUN;

DATA clean.completeus18; SET clean.merged; IF age GE 18 AND countrycit IN ('us');
RUN;

DATA Clean.completeus18; SET Clean.completeus18;
 tasksseen = .;
  IF orddebrief = 9 THEN tasksseen = 9;
  IF tasksseen = . AND ORDpop_up_close = 8 THEN tasksseen = 8;
  IF tasksseen = . AND (ORDcami = 7 OR ORDIATfeedback = 7) THEN tasksseen = 7;
  IF tasksseen = . AND (ORDiat = 6 OR ORDIATfeedback = 6) THEN tasksseen = 6;
  IF tasksseen = . AND (ORDiatinstruct = 5 OR ORDiat = 5) THEN tasksseen = 5;
  IF tasksseen = . AND (ORDiatinstruct = 4 OR ORDcami = 4) THEN tasksseen = 4;
  IF tasksseen = . AND (ORDimc = 3) THEN tasksseen = 3;
  IF tasksseen = . AND ORDpost_consent = 2 THEN tasksseen = 2;
  IF tasksseen = . AND ORDconsent = 1 THEN tasksseen = 1;
  IF tasksseen = . THEN tasksseen = 0;
 edu5 = .; * this reduction matches the PCIAS approach;
  IF edu GE 1 AND edu LE 3 THEN edu5 = 1;          *less than HS grad;
  ELSE IF edu = 4 THEN edu5 = 2;                   * HS grad;
  ELSE IF edu GE 5 AND edu LE 6 THEN edu5 = 3;     * Some college or AA degree;
  ELSE IF edu GE 7 AND edu LE 8 THEN edu5 = 4;     * BA or some grad;
  ELSE IF edu GE 9 THEN edu5 = 5;                  * grad degree;
  PROC SORT DATA = clean.completeus18; BY IATbeforeCAMI; RUN;
/*
       <option value="1">elementary school</option>
3221 	
3222 	        <option value="2">junior high</option>
3223 	
3224 	        <option value="3">some high school</option>
3225 	
3226 	        <option value="4">high school graduate</option>
3227 	
3228 	        <option value="5">some college</option>
3229 	
3230 	        <option value="6">associate's degree</option>
3231 	
3232 	        <option value="7">bachelor's degree</option>
3233 	
3234 	        <option value="8">some graduate school</option>
3235 	
3236 	        <option value="9">master's degree</option>
3237 	
3238 	        <option value="14">M.B.A.</option>
3239 	
3240 	        <option value="10">J.D.</option>
3241 	
3242 	        <option value="11">M.D.</option>
3243 	
3244 	        <option value="12">Ph.D.</option>
3245 	
3246 	        <option value="13">other advanced degree</option>
*/
ODS HTML PATH=WebOut FILE="DESC.tasksseen.BY.IATbeforeCAMI.htm";
TITLE 'Stigma3 DATA';
TITLE2 'Overall';
PROC FREQ DATA = clean.completeus18; TABLES tasksseen; RUN;
TITLE2 'Where consent was seen';
PROC FREQ DATA = clean.completeus18; TABLES tasksseen; WHERE tasksseen GE 1; RUN;
TITLE2 'Where consent given';
PROC FREQ DATA = clean.completeus18; TABLES tasksseen; WHERE tasksseen GE 2; RUN;
TITLE2 'Where exposed to first substantive task, i.e., WHERE tasksseen GE 4';
PROC FREQ DATA = clean.completeus18; TABLES tasksseen; WHERE tasksseen GE 4; RUN;
TITLE2 'BY IATbeforeCAMI';
PROC SORT DATA = clean.completeus18; BY IATbeforeCAMI; RUN;
PROC FREQ DATA = clean.completeus18; TABLES tasksseen; BY IATbeforeCAMI; WHERE tasksseen GE 4; RUN;
ODS HTML CLOSE;

DATA clean.completeus18; SET clean.completeus18;
 consented = 1; IF tasksseen < 2 THEN consented = 0;
 PROC FREQ; TABLES tasksseen consented; RUN;

ODS HTML PATH=WebOut FILE="DESC.demographics.htm";
TITLE 'Overall completeus18';
PROC FREQ DATA = clean.completeus18; TABLES age sex ethnicityomb raceomb edu5 politicalid relid CountryCit; 
 RUN;
PROC UNIVARIATE DATA = clean.completeus18 PLOT; VAR age; RUN;
TITLE 'Consenters completeus18';
PROC FREQ DATA = clean.completeus18; TABLES age sex ethnicityomb raceomb edu5 politicalid relid CountryCit; 
 WHERE tasksseen GE 2; RUN;
PROC UNIVARIATE DATA = clean.completeus18 PLOT; VAR age;  WHERE tasksseen GE 2; RUN;
ODS HTML CLOSE;

DATA clean.completeus18; SET clean.completeus18;
 sawsubstance = .;
  IF tasksseen GE 1 AND tasksseen LE 3 THEN sawsubstance = 0;
  ELSE IF tasksseen GE 4 THEN sawsubstance = 1;
  PROC FREQ; TABLES sawsubstance; RUN;

ODS HTML PATH=WebOut FILE="DESC.consentBYdemographics.htm";
TITLE 'Overall completeus18';
PROC FREQ DATA = Clean.completeus18; TABLES consented*age/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES consented*sex/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES consented*ethnicityomb/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES consented*raceomb/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES consented*edu5/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES consented*politicalid/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES consented*relid/CHISQ; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.sawsubstanceBYdemographics.htm";
TITLE 'Overall completeus18';
PROC FREQ DATA = Clean.completeus18; TABLES sawsubstance*age/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES sawsubstance*sex/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES sawsubstance*ethnicityomb/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES sawsubstance*raceomb/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES sawsubstance*edu5/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES sawsubstance*politicalid/CHISQ; RUN;
PROC FREQ DATA = Clean.completeus18; TABLES sawsubstance*relid/CHISQ; RUN;
ODS HTML CLOSE;


PROC FREQ DATA = clean.completeus18; tables IATBEFORECAMI; RUN;
DATA clean.consentedus18; SET clean.completeus18; IF consented = 1; RUN;


PROC CONTENTS DATA = clean.consentedus18; RUN;
PROC FREQ DATA = clean.consentedus18; tables feedbackcorrect; RUN;
PROC MEANS DATA = clean.consentedus18; VAR camimean BIATy BIATy1 BIATy2; RUN;

DATA clean.consentedus18; SET clean.consentedus18;
 feedbackvalid = .;
  IF feedbackcorrect > 3 THEN feedbackvalid = 0;
  ELSE IF feedbackcorrect GE -3 AND feedbackcorrect LE 3 THEN feedbackvalid = 1;
 validcamiandIAT = 0;
 IF camimean NE . AND BIAT NE . AND feedbackcorrect GE -3 AND feedbackcorrect LE 3
  THEN validcamiandIAT = 1;
  PROC FREQ DATA = clean.consentedus18; TABLES validcamiandIAT; RUN;
  PROC FREQ DATA = clean.consentedus18; TABLES feedbackvalid; RUN;
  PROC MEANS DATA = clean.consentedus18; VAR BIAT; WHERE feedbackvalid = 0; RUN;
  PROC MEANS DATA = clean.consentedus18; VAR BIAT; WHERE feedbackvalid = 1; RUN;

DATA Clean.consentedus18; SET Clean.consentedus18;
 camivalid = 0;
  IF camimean NE . THEN camivalid = 1;
 camitotalrt = SUM(cami1rt,cami2rt,cami3rt,cami4rt,cami5rt,cami6rt,cami7rt,cami8rt,cami9rt,cami10rt);
 camitotalrts = camitotalrt/1000;
RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES camivalid; RUN;

DATA Clean.consentedus18; SET Clean.consentedus18;
 BIATandCAMI = .;
  IF IATbeforeCAMI NE . THEN BIATandCAMI = 0;
  IF feedbackvalid = 1 AND camivalid = 1 AND BIAT NE . THEN BIATandCAMI = 1;
  PROC FREQ; TABLES BIATandCAMI validcamiandIAT; RUN;

    PROC MEANS DATA = clean.consentedus18; VAR BIAT; WHERE validcamiandIAT = 1; RUN;
    PROC MEANS DATA = clean.consentedus18; VAR BIAT; WHERE biatandcami = 1; RUN;
  PROC FREQ DATA = clean.consentedus18; TABLES validcamiandIAT; RUN;
  PROC FREQ DATA = clean.consentedus18; TABLES biatandcami; RUN;

DATA Clean.consentedus18; SET Clean.consentedus18;
 imccorrect = .;
  IF omdimc2rt = 0 THEN imccorrect = 1;
  ELSE IF omdimc2 = 1 THEN imccorrect = 0;
 imcfailed = .;
  IF imccorrect = 1 THEN imcfailed = 0;
  ELSE IF imccorrect = 0 THEN imcfailed = 1;
  omdimc2rts = omdimc2rt/1000;
ODS HTML PATH=WebOut FILE="DESC.IMC.htm";
TITLE 'Stigma3 consentedus18' ;
TITLE2 'Overall';
PROC FREQ DATA = clean.consentedus18; TABLES imccorrect imcfailed; RUN;
TITLE2 'WHERE biatandcami = 1';
PROC FREQ DATA = clean.consentedus18; TABLES imccorrect imcfailed; WHERE biatandcami = 1; RUN;
PROC SORT DATA = clean.consentedus18; BY imccorrect; RUN;
TITLE2 'IMC times for those who continued to next task';
TITLE3 'BY imccorrect';
PROC MEANS DATA = clean.consentedus18; VAR timeimc; BY imccorrect; RUN;
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR timeimc; BY imccorrect; RUN;
PROC FREQ DATA = clean.consentedus18; TABLES timeimc; BY imccorrect; RUN;
TITLE2 'Reaction times (BASED ON omdimc2rtfor those who were incorrect)';
PROC CORR DATA = clean.consentedus18; VAR omdimc2rts timeimc; 
 WHERE imccorrect = 0; RUN;
ODS HTML CLOSE;

DATA Clean.consentedus18; SET Clean.consentedus18;
 edu3 = .;
  IF edu GE 1 AND edu LE 4 THEN edu3 = 0;
  ELSE IF edu GE 5 AND edu LE 7 THEN edu3 = 1;
  ELSE IF edu GE 8  THEN edu3 = 2;
 female = .;
  IF sex IN ('f') THEN female = 1;
  ELSE IF sex IN ('m') THEN female = 0;
 black1white0 = .;
  IF RaceOMB = 6 THEN black1white0 = 0;
  ELSE IF RaceOMB = 5 THEN black1white0 = 1;
RUN;

DATA Clean.consentedus18; SET Clean.consentedus18;
 edu3code_1vs2 = .;
  IF edu3 = 0 THEN edu3code_1vs2 = 0;
   ELSE IF edu3 = 2 THEN edu3code_1vs2 = .5;
   ELSE IF edu3 = 1 THEN edu3code_1vs2 = -.5;
 edu3code_0vs1and2 = .;
  IF edu3 = 0 THEN edu3code_0vs1and2 = -1;
  ELSE IF edu3 > 0 THEN edu3code_0vs1and2 = .5;
PROC FREQ; TABLES edu3 edu3code_1vs2 edu3code_0vs1and2; RUN;
/****************************************************************************/
/* center IVs at zero WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3*/
PROC STANDARD DATA = Clean.consentedus18 MEAN=0  OUT=centered; 
 VAR BIAT BIAT1 BIAT2 IATbeforeCAMI BIAT_MIgood1st feedbackcorrect timecami age female black1white0 edu3 edu3code_1vs2 edu3code_0vs1and2 IMCcorrect
; WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;

DATA centered ; SET centered ;
 BIATc=BIAT; BIAT1c=BIAT1; BIAT2c=BIAT2; IATbeforeCAMIc= IATbeforeCAMI; BIAT_MIgood1stc = BIAT_MIgood1st; feedbackcorrectc= feedbackcorrect; 
 timecamic=timecami; timecamic60 = timecamic/60; agec=age; femalec=female; black1white0c=black1white0; edu3c=edu3;
 edu3code_1vs2c=edu3code_1vs2; edu3code_0vs1and2c=edu3code_0vs1and2; IMCcorrectc=IMCcorrect; 
 KEEP BIATc BIAT1c BIAT2c IATbeforeCAMIc BIAT_MIgood1stc feedbackcorrectc timecamic timecamic60 agec femalec black1white0c 
      edu3c edu3code_1vs2c edu3code_0vs1and2c IMCcorrectc session_id;
PROC CONTENTS; RUN;
PROC FREQ DATA = centered ; TABLES IATbeforeCAMIc; run;

PROC SORT DATA = Clean.consentedus18 ; BY session_id; RUN;
PROC SORT DATA = centered ; BY session_id; RUN;

DATA Clean.consentedus18 ; MERGE Clean.consentedus18 centered ; BY session_id; RUN;
/****************************************************************************/
/* center IVs at zero WHERE BIAT score not disqualified by our standard criteria */
PROC STANDARD DATA = Clean.consentedus18 MEAN=0  OUT=centered; 
 VAR BIAT IATbeforeCAMI BIAT_MIgood1st feedbackcorrect; WHERE BIATdq = 0; 
RUN;

DATA centered ; SET centered ;
 BIATcq=BIAT; IATbeforeCAMIcq= IATbeforeCAMI; BIAT_MIgood1stcq = BIAT_MIgood1st; feedbackcorrectcq= feedbackcorrect;
 KEEP BIATcq IATbeforeCAMIcq BIAT_MIgood1stcq feedbackcorrectcq session_id;
PROC CONTENTS; RUN;
PROC FREQ DATA = centered ; TABLES IATbeforeCAMIcq; run;

PROC SORT DATA = Clean.consentedus18 ; BY session_id; RUN;
PROC SORT DATA = centered ; BY session_id; RUN;

DATA Clean.consentedus18 ; MERGE Clean.consentedus18 centered ; BY session_id; RUN;
/****************************************************************************/
ODS HTML PATH=WebOut FILE="DESC.BIATexclusion.htm";
TITLE 'Stigma3 data = clean.consentedus18';
TITLE2 'Exclusion criteria BY feedbackvalid'; PROC SORT DATA = clean.consentedus18; BY feedbackvalid; RUN;
PROC FREQ DATA = clean.consentedus18; tables BIATexcl BIATexclError BIATexclFast BIATdq; BY feedbackvalid; RUN;
TITLE2 'Exclusion criteria WHERE feedbackvalid = . AND BIATdq = 0';
PROC PRINT DATA = clean.consentedus18; VAR feedbackvalid BIATdq BIAT BIATexcl BIATexclError BIATexclFast; 
 WHERE feedbackvalid = . AND BIATdq = 0; RUN;
TITLE2 'Exclusion criteria WHERE feedbackvalid = 0 AND BIATdq = 0';
PROC PRINT DATA = clean.consentedus18; VAR feedbackvalid BIATdq BIAT BIATexcl BIATexclError BIATexclFast; 
 WHERE feedbackvalid = 0 AND BIATdq = 0; RUN;
TITLE2 'Exclusion criteria WHERE feedbackvalid = 1 AND BIATdq = 1';
PROC PRINT DATA = clean.consentedus18; VAR feedbackvalid BIATdq BIAT BIATexcl BIATexclError BIATexclFast; 
 WHERE feedbackvalid = 1 AND BIATdq = 1; RUN;


TITLE2 'Errors WHERE feedbackvalid = . AND BIAT NE .';
PROC PRINT DATA = clean.consentedus18; VAR feedbackvalid BIAT BIATexcl BIATexclError BIATerrmean BIATeb4 BIATeb7 BIATfastmean BIATfb4 BIATfb7; WHERE feedbackvalid = . AND BIAT NE .; RUN;

TITLE2 'Errors WHERE feedbackvalid = 0';
PROC SORT DATA = clean.consentedus18; BY BIATexcl BIATexclError; RUN;
PROC PRINT DATA = clean.consentedus18; VAR feedbackvalid BIAT BIATexcl BIATexclError BIATerrmean 
   BIATeb4 BIATeb7 BIATfastmean BIATfb4 BIATfb7 ; WHERE feedbackvalid = 0; RUN;

TITLE2 'OVERALL Exclusion criteria by typical procedures';
PROC FREQ DATA = clean.consentedus18; tables BIATexcl BIATexclError BIATexclFast BIATdq; RUN;
TITLE2 'Exclusion criteria WHERE BIATexcl NE 2';
PROC FREQ DATA = clean.consentedus18; tables BIATexcl BIATexclError BIATexclFast BIATdq; WHERE BIATexcl NE 2; RUN;
ODS HTML CLOSE;

PROC FREQ DATA = Clean.consentedus18; TABLES raceomb; WHERE consented = 1; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES raceomb; WHERE validcamiandIAT = 0; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES raceomb; WHERE validcamiandIAT = 1; RUN;

ODS HTML PATH=WebOut FILE="DESC.demographicsBYvalidcamiandIAT.htm";
TITLE 'consentedus18';
PROC FREQ DATA = Clean.consentedus18; TABLES validcamiandIAT*age/CHISQ; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES validcamiandIAT*sex/CHISQ; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES validcamiandIAT*ethnicityomb/CHISQ; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES validcamiandIAT*raceomb/CHISQ; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES validcamiandIAT*edu5/CHISQ; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES validcamiandIAT*politicalid/CHISQ; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES validcamiandIAT*relid/CHISQ; RUN;
TITLE2 'WHERE validcamiandIAT = 1';
PROC UNIVARIATE DATA = Clean.consentedus18; VAR age; WHERE validcamiandIAT = 1; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES sex; WHERE validcamiandIAT = 1; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES ethnicityomb; WHERE validcamiandIAT = 1; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES raceomb; WHERE validcamiandIAT = 1; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES edu5; WHERE validcamiandIAT = 1; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES politicalid; WHERE validcamiandIAT = 1; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES relid; WHERE validcamiandIAT = 1; RUN;
TITLE2 'WHERE age GE 25 AND validcamiandIAT = 1';
PROC FREQ DATA = Clean.consentedus18; TABLES edu5; WHERE validcamiandIAT = 1 AND age GE 25; RUN;
TITLE2 'BIATandCAMI valid by condition, i.e., for those who at least reached first substantive task';
PROC SORT DATA = Clean.consentedus18; BY IATbeforeCAMI; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI*age/CHISQ;  BY IATbeforeCAMI; WHERE IATbeforeCAMI NE .; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI*sex/CHISQ;  BY IATbeforeCAMI; WHERE IATbeforeCAMI NE .; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI*ethnicityomb/CHISQ;  BY IATbeforeCAMI; WHERE IATbeforeCAMI NE .; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI*raceomb/CHISQ;  BY IATbeforeCAMI; 
 WHERE IATbeforeCAMI NE . AND (raceomb = 5 OR raceomb = 6 OR raceomb = 8 OR raceomb = 9); RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI*edu5/CHISQ;  BY IATbeforeCAMI; WHERE IATbeforeCAMI NE .; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI*politicalid/CHISQ; BY IATbeforeCAMI; WHERE IATbeforeCAMI NE .; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI*relid/CHISQ; BY IATbeforeCAMI; WHERE IATbeforeCAMI NE .; RUN;
ODS HTML CLOSE;

PROC SORT DATA = clean.consentedus18; BY feedbackcorrect;
ODS HTML PATH=WebOut FILE="DESC.BIATerrors.WHEREfeedbackinvalid.htm";
TITLE 'Stigma3 data = clean.consentedus18';
TITLE2 'BY feedbackcorrect WHERE feedback invalid';
PROC FREQ DATA = clean.consentedus18; 
 tables BIATerrmean BIATeb3 BIATeb4 BIATeb6 BIATeb7 BIATfb3 BIATfb4 BIATfb6 BIATfb7; 
 BY feedbackcorrect; WHERE feedbackcorrect > 3; run;
ODS HTML CLOSE;

DATA clean.consentedus18; SET clean.consentedus18;
 feedbackDQmatch = 0;
  IF BIAT NE . AND (BIATeb4 > .4 OR BIATeb7 > .4 OR BIATfastmean GE .1) THEN feedbackDQmatch = 1;
  IF BIAT = . THEN feedbackDQmatch = .;
  PROC SORT; BY feedbackinvalid; 
  PROC FREQ; TABLES feedbackDQmatch; BY feedbackinvalid; RUN;

ODS HTML PATH=WebOut FILE="DESC.BIAT.exclusion.1.potential.htm";
TITLE 'Menatti Data'; 
 TITLE2 'BIAT pre-exclusions';
 PROC MEANS DATA = clean.consentedus18; VAR BIAT; RUN;
 PROC FREQ DATA = clean.consentedus18; TABLES 
BIATexcl BIATexclError BIATexclFast; RUN; 
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.BIAT.exclusion.2.cleaned.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
 TITLE2 'BIAT after-exclusions';
 PROC MEANS DATA = clean.consentedus18; VAR BIAT BIAT1 BIAT2; WHERE BIATdq = 0; RUN;
 TITLE2 'BIATs excluded';
 PROC MEANS DATA = clean.consentedus18; VAR BIAT BIAT1 BIAT2; WHERE BIATdq = 1; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.BIATinternalconsistency.htm";
TITLE 'Stigma3 BIAT internal consistency; WHERE BOTH BIAT and CAMI VALID'; 
PROC MEANS DATA = clean.consentedus18 N MEAN MIN MAX; VAR BIAT1 BIAT2 BIAT; 
 WHERE biatandcami = 1; RUN;
TITLE2 'Split-halves (12 & 24 trial blocks)';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIAT1 BIAT2; WHERE biatandcami = 1; RUN;
TITLE2 'Overall Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy BIATz; WHERE biatandcami = 1; RUN;
TITLE2 'Block 3 & 6 Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy1 BIATz1; WHERE biatandcami = 1; RUN;
TITLE2 'Block 4 & 7 Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy2 BIATz2; WHERE biatandcami = 1; RUN;

TITLE 'Stigma3 BIAT internal consistency; VALID BIAT Feedback'; 
PROC MEANS DATA = clean.consentedus18 N MEAN MIN MAX; VAR BIAT1 BIAT2 BIAT; 
 WHERE feedbackinvalid = 0; RUN;
TITLE2 'Split-halves (12 & 24 trial blocks)';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIAT1 BIAT2; WHERE feedbackinvalid = 0; RUN;
TITLE2 'Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy BIATz; WHERE feedbackinvalid = 0; RUN;

TITLE 'Stigma3 BIAT internal consistency; INVALID BIAT Feedback'; 
PROC MEANS DATA = clean.consentedus18 N MEAN MIN MAX; VAR BIAT1 BIAT2 BIAT; 
 WHERE feedbackinvalid = 1; RUN;
TITLE2 'Split-halves (12 & 24 trial blocks)';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIAT1 BIAT2; WHERE feedbackinvalid = 1; RUN;
TITLE2 'Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy BIATz; WHERE feedbackinvalid = 1; RUN;

TITLE 'Stigma3 BIAT internal consistency; BIATdq = 0'; 
PROC MEANS DATA = clean.consentedus18 N MEAN MIN MAX; VAR BIAT1 BIAT2 BIAT; WHERE BIATdq = 0; RUN;
TITLE2 'Split-halves (12 & 24 trial blocks)';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIAT1 BIAT2; WHERE BIATdq = 0; RUN;
TITLE2 'Overall Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy BIATz; WHERE BIATdq = 0; RUN;
TITLE2 'Block 3 & 6 Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy1 BIATz1; WHERE BIATdq = 0; RUN;
TITLE2 'Block 4 & 7 Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy2 BIATz2; WHERE BIATdq = 0; RUN;

TITLE 'Stigma3 BIAT internal consistency; BIAT disqualified'; 
PROC MEANS DATA = clean.consentedus18 N MEAN MIN MAX; VAR BIAT1 BIAT2 BIAT; WHERE BIATdq = 1; RUN;
TITLE2 'Split-halves (12 & 24 trial blocks)';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIAT1 BIAT2; WHERE BIATdq = 1; RUN;
TITLE2 'Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy BIATz; WHERE BIATdq = 1; RUN;

TITLE 'Stigma3 BIAT internal consistency; BY BIATexcl'; 
PROC SORT DATA = clean.consentedus18; BY BIATexcl;
PROC MEANS DATA = clean.consentedus18 N MEAN MIN MAX; VAR BIAT1 BIAT2 BIAT; BY BIATexcl; WHERE BIATexcl NE .; RUN;
TITLE2 'Split-halves (12 & 24 trial blocks)';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIAT1 BIAT2; BY BIATexcl; WHERE BIATexcl NE .; RUN;
TITLE2 'Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy BIATz; BY BIATexcl; WHERE BIATexcl NE .; RUN;

TITLE 'Stigma3 BIAT internal consistency; BY BIATexclError'; 
PROC SORT DATA = clean.consentedus18; BY BIATexclError;
PROC MEANS DATA = clean.consentedus18 N MEAN MIN MAX; VAR BIAT1 BIAT2 BIAT; BY BIATexclError;  WHERE BIATexclError NE .; RUN;
TITLE2 'Split-halves (12 & 24 trial blocks)';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIAT1 BIAT2; BY BIATexclError; WHERE BIATexclError NE .; RUN;
TITLE2 'Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy BIATz; BY BIATexclError; WHERE BIATexclError NE .; RUN;

TITLE 'Stigma3 BIAT internal consistency; BY BIATexclFast'; 
PROC SORT DATA = clean.consentedus18; BY BIATexclFast;
PROC MEANS DATA = clean.consentedus18 N MEAN MIN MAX; VAR BIAT1 BIAT2 BIAT; BY BIATexclFast;  WHERE BIATexclFast NE .; RUN;
TITLE2 'Split-halves (12 & 24 trial blocks)';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIAT1 BIAT2; BY BIATexclFast; WHERE BIATexclFast NE .; RUN;
TITLE2 'Alternating Couplets';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR BIATy BIATz; BY BIATexclFast; WHERE BIATexclFast NE .; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.feedback.htm";
TITLE 'DATA = Menatti Stigma3 consentedus18';
TITLE2 'Overall';
PROC FREQ DATA = Clean.consentedus18; TABLES feedback; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES feedbackcorrect; RUN;
TITLE2 'BY IATbeforeCAMI';
PROC SORT DATA = Clean.consentedus18; BY IATbeforeCAMI; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES feedbackcorrect; BY IATbeforeCAMI; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.CAMI.htm";
TITLE 'Stigma3: CAMI, higher = more MI discrimination'; 
TITLE2 'Items completed WHEN cami was at least reached'; 
  PROC FREQ DATA = clean.consentedus18; TABLES camiresponses; WHERE ordcami NE .; RUN;
TITLE2 'Items completed WHEN at least one CAMI item was completed'; 
  PROC FREQ DATA = clean.consentedus18; TABLES camiresponses; WHERE camiresponses > 0; RUN;
TITLE2 'Scores, total and mean, for Ps answering all'; 
TITLE3 '(only 16 of 759 who answered ANY cami did less than all)'; 
PROC FREQ DATA = clean.consentedus18; TABLES camitotal camimean; RUN;
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR camimean; RUN;
TITLE2 'Scores for Ps WITH VALID BIAT FEEDBACK'; 
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR camimean; WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
PROC FREQ DATA = clean.consentedus18; TABLES camitotal camimean; WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
TITLE2 'Valid CAMI by IATbeforeCAMI'; 
PROC SORT DATA = clean.consentedus18; BY IATbeforeCAMI; RUN;
PROC FREQ DATA = clean.consentedus18; TABLES camivalid; BY IATbeforeCAMI; WHERE IATbeforeCAMI NE .; RUN;
TITLE2 'Even SEE the CAMI (by IATbeforeCAMI)'; 
TITLE3 'When IATbeforeCAMI = 0, seeing CAMI = tasksseen 4';
TITLE3 'When IATbeforeCAMI = 1, seeing CAMI = tasksseen 7';
PROC FREQ DATA = clean.consentedus18; TABLES tasksseen; BY IATbeforeCAMI; WHERE IATbeforeCAMI NE .; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.CAMIinternalconsistency.htm";
TITLE 'Stigma3 CAMI internal consistency; WHERE BOTH BIAT and CAMI VALID'; 
TITLE2 'OVERALL';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR 
cami1 cami2 cami3 cami4 cami5 cami6 cami7 cami8 cami9 cami10
; WHERE biatandcami = 1; RUN;
PROC SORT DATA = clean.consentedus18; BY IATbeforeCAMI; RUN;
TITLE2 'BY IATbeforeCAMI';
PROC CORR DATA = clean.consentedus18 ALPHA NOMISS; VAR 
cami1 cami2 cami3 cami4 cami5 cami6 cami7 cami8 cami9 cami10
; BY IATbeforeCAMI; WHERE biatandcami = 1; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.BIATandCAMI.WHEREbothValid.htm";
TITLE 'Stigma3 for valid completers both BIAT and CAMI'; 
TITLE2 'WHERE biatandcami = 1'; 
PROC SORT DATA = clean.consentedus18; BY IATbeforeCAMI; RUN;
  PROC MEANS DATA = clean.consentedus18; VAR biat camimean; BY IATbeforeCAMI; WHERE biatandcami = 1; RUN;
ODS HTML CLOSE;

  PROC MEANS DATA = clean.consentedus18; VAR biat; WHERE biatandcami = 1; RUN;
  PROC MEANS DATA = clean.consentedus18; VAR biat; WHERE feedbackvalid = 1 AND camimean NE .; RUN;
  PROC MEANS DATA = clean.consentedus18; VAR camimean; WHERE feedbackvalid = 1 AND camimean NE .; RUN;
proc print DATA = clean.consentedus18; VAR biat1 biat2 biat camimean; WHERE feedbackvalid = 1 AND camimean NE . AND biat = .; RUN;

ODS HTML PATH=WebOut FILE="DESC.BIATscoreBYfeedback.htm";
TITLE 'Stigma3, data = clean.consentedus18';
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")';
PROC CORR DATA = clean.consentedus18; VAR BIAT1 BIAT2 BIAT;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; 
TITLE2 'WHERE feedback was invalid (i.e., "too many errors" or "too fast")';
PROC CORR DATA = clean.consentedus18; VAR BIAT1 BIAT2 BIAT;
  WHERE feedbackcorrect GT 3; RUN; 
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.BIAT.BYvalidityOFfeedback.htm";
TITLE 'stigma3, data = clean.consentedus18';
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")';
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR BIAT;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; 
TITLE2 'WHERE feedback was invalid (i.e., "too many errors" or "too fast")';
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR BIAT;
  WHERE feedbackcorrect GT 3; RUN; 
ODS HTML CLOSE;

PROC SORT DATA = clean.consentedus18; BY IATbeforeCAMI; RUN;

ODS HTML PATH=WebOut FILE="CORR.IATandCAMI.htm";
TITLE 'Stigma3 consentedus18'; 
TITLE2 'Overall'; 
PROC CORR DATA = Clean.consentedus18; VAR camimean BIAT1 BIAT2 BIAT; RUN;
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC CORR DATA = Clean.consentedus18; VAR camimean BIAT1 BIAT2 BIATy BIATz BIAT; 
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
TITLE3 'BY IATbeforeCAMI'; 
PROC CORR DATA = Clean.consentedus18; VAR camimean BIAT1 BIAT2 BIAT; BY IATbeforeCAMI;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
TITLE2 'WHERE our usual IAT DQs were made (regardless of feedback)'; 
PROC CORR DATA = Clean.consentedus18; VAR camimean BIAT1 BIAT2 BIAT; 
  WHERE BIATdq = 0; RUN;
TITLE3 'BY IATbeforeCAMI'; 
PROC CORR DATA = Clean.consentedus18; VAR camimean BIAT1 BIAT2 BIAT; BY IATbeforeCAMI;
  WHERE BIATdq = 0; RUN;
TITLE2 'WHERE feedback was missing OR "too many errors" OR "too fast")'; 
PROC CORR DATA = Clean.consentedus18; VAR camimean BIAT1 BIAT2 BIAT; 
  WHERE feedbackcorrect = . OR  feedbackcorrect > 3; RUN;
ODS HTML CLOSE;


ODS HTML PATH=WebOut FILE="DESC.BIATscoreBYbiatorder.htm";
TITLE 'Stigma3, data = clean.consentedus18';
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")';
PROC GLM DATA = clean.consentedus18; MODEL BIAT = BIAT_MIgood1st;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors") AND IATbeforeCAMIc NE missing';
PROC GLM DATA = clean.consentedus18; MODEL BIAT = BIAT_MIgood1st;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND IATbeforeCAMIc NE .; RUN; 
PROC GLM DATA = clean.consentedus18; MODEL BIAT = BIAT_MIgood1stc|IATbeforeCAMIc;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; 
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="GLM.BIAT.on.Demographics.htm";
TITLE 'Stigma3, data = clean.consentedus18';
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")';
PROC GLM DATA = clean.consentedus18; MODEL BIAT = agec;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; 
PROC GLM DATA = clean.consentedus18; MODEL BIAT = femalec;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; 
PROC GLM DATA = clean.consentedus18; MODEL BIAT = black1white0c;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; 
PROC GLM DATA = clean.consentedus18; MODEL BIAT = edu3code_1vs2c edu3code_0vs1and2c;
  WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; 
ODS HTML CLOSE;

PROC MEANS DATA = Clean.consentedus18; 
 VAR camimean IATbeforeCAMIc biatandcami; RUN;


ODS HTML PATH=WebOut FILE="REG.CAMIonIATorder.validFeedback.htm";
TITLE 'Stigma3 consentedus18' ; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
 TITLE3 'WHERE BIAT2 not missing'; 
PROC REG DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc/STB; 
WHERE biat2 NE . AND biatandcami = 1; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.htm";
TITLE 'Stigma3 consentedus18' ; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE biatandcami = 1; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc; 
 OUTPUT OUT=PRED P=YHAT RSTUDENT = resid COOKD= cookd H=lever DFFITS=dffit;
WHERE biatandcami = 1; 
RUN;
TITLE3 'Include effects of feedback and interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE biatandcami = 1; 
RUN;
TITLE3 'Include effect of BIAT counterbalance condition, ALL EFFECTS'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|feedbackcorrectc|BIAT_MIgood1stc 
  ; 
WHERE biatandcami = 1; 
RUN;

TITLE3 'Include effect of BIAT counterbalance condition, DROP NS feedback*IATbeforeCAMI'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc BIAT_MIgood1stc 
  IATbeforeCAMIc*BIAT_MIgood1stc feedbackcorrectc*BIAT_MIgood1stc IATbeforeCAMIc*feedbackcorrectc*BIAT_MIgood1stc; 
WHERE biatandcami = 1; 
RUN;
TITLE3 'Include effect of BIAT counterbalance condition, DROP 3-way interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc BIAT_MIgood1stc 
  IATbeforeCAMIc*BIAT_MIgood1stc feedbackcorrectc*BIAT_MIgood1stc ; 
WHERE biatandcami = 1; 
RUN;
TITLE3 'Include effect of BIAT counterbalance condition, DROP NS IATbeforeCAMIc*BIAT_MIgood1stc interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc BIAT_MIgood1stc 
  feedbackcorrectc*BIAT_MIgood1stc ; 
WHERE biatandcami = 1; 
RUN;

TITLE3 'Include effect of BIAT'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc BIATc ; 
WHERE biatandcami = 1; RUN;
TITLE3 'Include effect of BIAT and interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc BIATc IATbeforeCAMIc*BIATc; 
WHERE biatandcami = 1; RUN;
TITLE3 'Include effect of BIAT counterbalance condition'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|BIATc|BIAT_MIgood1stc; 
WHERE biatandcami = 1; RUN;

TITLE3 'BIAT1'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc BIAT1c ; 
WHERE biatandcami = 1; RUN;
TITLE3 'BIAT1 and interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc BIAT1c IATbeforeCAMIc*BIAT1c; 
WHERE biatandcami = 1; RUN;
TITLE3 'Include effect of BIAT counterbalance condition'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|BIAT1c|BIAT_MIgood1stc; 
WHERE biatandcami = 1; RUN;

TITLE3 'WHERE BIAT2 not missing'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE biat2 NE . AND biatandcami = 1; RUN;
TITLE3 'BIAT2'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc BIAT2c ; 
WHERE biatandcami = 1; RUN;
TITLE3 'BIAT2 and interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc BIAT2c IATbeforeCAMIc*BIAT2c; 
WHERE biatandcami = 1; RUN;
TITLE3 'Include effect of BIAT counterbalance condition'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|BIAT2c|BIAT_MIgood1stc; 
WHERE biatandcami = 1; RUN;

TITLE 'Regression Diagnostics';
TITLE2 'camimean = IATbeforeCAMIc feedbackcorrectc';
PROC SORT DATA = pred; BY resid camimean feedbackcorrect IATbeforeCAMI;
PROC UNIVARIATE PLOT DATA = pred; VAR resid ; WHERE resid NE .; RUN;
PROC PRINT DATA = pred NOOBS;
  VAR resid session_id camimean feedbackcorrect IATbeforeCAMI; WHERE resid NE .; RUN;
PROC SORT DATA = pred; BY lever camimean feedbackcorrect IATbeforeCAMI;
PROC UNIVARIATE PLOT DATA = pred; VAR lever; WHERE resid NE .; RUN;
PROC PRINT DATA = pred NOOBS;
  VAR lever session_id camimean feedbackcorrect IATbeforeCAMI; WHERE resid NE .; RUN;
PROC FREQ DATA = pred; TABLES lever; RUN;
PROC SORT DATA = pred; BY cookd camimean feedbackcorrect IATbeforeCAMI;
PROC UNIVARIATE PLOT DATA = pred;  VAR  cookd ; WHERE resid NE .; RUN;
PROC PRINT DATA = pred NOOBS;
  VAR cookd session_id camimean feedbackcorrect IATbeforeCAMI; WHERE resid NE .; RUN;
PROC FREQ DATA = pred; TABLES cookd; RUN;
PROC SORT DATA = pred; BY dffit camimean feedbackcorrect IATbeforeCAMI;
PROC UNIVARIATE PLOT DATA = pred; VAR dffit; WHERE resid NE .; RUN;
PROC PRINT DATA = pred NOOBS;
  VAR dffit session_id camimean feedbackcorrect IATbeforeCAMI; WHERE resid NE .; RUN;
PROC FREQ DATA = pred; TABLES dffit; RUN;

TITLE 'STRICTEST OUTLIER TEST';
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = pred; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc; 
WHERE biatandcami = 1
AND cookd LE .0065 AND dffit GE -.114 AND dffit LE .114
; 
RUN;
TITLE3 'Test interaction, tho not in the original model tested for influence'; 
PROC GLM DATA = pred; 
 MODEL camimean = IATbeforeCAMIc|feedbackcorrectc; 
WHERE biatandcami = 1
AND cookd LE .0065 AND dffit GE -.114 AND dffit LE .114
; 
RUN; QUIT;
ODS HTML CLOSE; 

DATA plot; SET Clean.consentedus18;
 condXbiatgood1st = .;
 IF IATbeforeCAMI = 0 AND BIAT_MIgood1st = 0 THEN condXbiatgood1st = 0;
 ELSE IF IATbeforeCAMI = 0 AND BIAT_MIgood1st = 1 THEN condXbiatgood1st = 1;
 ELSE IF IATbeforeCAMI = 1 AND BIAT_MIgood1st = 0 THEN condXbiatgood1st = 2;
 ELSE IF IATbeforeCAMI = 1 AND BIAT_MIgood1st = 1 THEN condXbiatgood1st = 3;
RUN;

PROC GPLOT DATA=plot;
	TITLE ' ';
		SYMBOL1 I=R MODE=INCLUDE V=NONE CI=blue W=2 L=2; *CO=blue W=1 L=2;
		SYMBOL2 I=R MODE=INCLUDE V=NONE CI=blue W=2 L=1; * CO=red W=1 L=2;
		SYMBOL3 I=R MODE=INCLUDE V=NONE CI=red W=2 L=2; * CO=purple W=1 L=2;
		SYMBOL4 I=R MODE=INCLUDE V=NONE CI=red W=2 L=1; * CO=purple W=1 L=2;
	AXIS1
		ORDER = (1.7 to 2.7 by .2)
		LABEL = (A=90 F=SWISSX h=1.3 'CAMI Mean')
		MINOR = none
		OFFSET = (2);
	AXIS2
		ORDER = (-1.2 to 1.2 by .2)
		LABEL = (F=SWISSX H=1.3 'BIAT')
		MINOR = none
		OFFSET = (2); 
	PLOT camimean * BIAT2 = condXbiatgood1st/NOLEGEND VAXIS=AXIS1 HAXIS=AXIS2;
	WHERE biatandcami = 1 AND camimean LT 4 AND BIAT2 < 1.2; RUN; QUIT;
RUN;
QUIT;

PROC GPLOT DATA=plot;
	TITLE ' ';
		SYMBOL1 I=R MODE=INCLUDE V=NONE CI=purple W=2 L=2; *CO=blue W=1 L=2;
		SYMBOL2 I=R MODE=INCLUDE V=NONE CI=purple W=2 L=1; * CO=red W=1 L=2;
	AXIS1
		ORDER = (1.7 to 2.7 by .2)
		LABEL = (A=90 F=SWISSX h=1.3 'CAMI Mean')
		MINOR = none
		OFFSET = (2);
	AXIS2
		ORDER = (-1.2 to 1.2 by .2)
		LABEL = (F=SWISSX H=1.3 'BIAT')
		MINOR = none
		OFFSET = (2); 
	PLOT camimean * BIAT2 = BIAT_MIgood1st/NOLEGEND VAXIS=AXIS1 HAXIS=AXIS2;
	WHERE biatandcami = 1 AND camimean LT 4 AND BIAT2 < 1.2; RUN; QUIT;
RUN;
QUIT;
/* Extremes: 
(notes on conventions from Cohen, Cohen, West & Aiken (2003) 
 and Chen et al. from http://www.ats.ucla.edu/stat/sas/webbooks/reg/chapter2/sasreg2.htm)

Cohen et al.:
Significance of largest studentized residual: 
Bonferroni procedure = alpha/n. So .05/615 = 0.00008.  
For df = (n - k - 1) = (615 - 2 -1) = 612 */ 
data crittvals; input  pr  ddf; tvalue = tinv(1-(pr/2),ddf); 
 cards; 
 0.00008  612
 ;
proc print data=crittvals; var pr  ddf   tvalue; run;
/*
                                                Obs      pr      ddf     tvalue

                                                 1     .00008    612    3.97124
*/
/*
THEREFORE 
crit two-tailed value of t for .00008 = 3.97124, so look for resid > |3.97124|:
 NONE (Largest resid = 3.86)
*/
/**************************************************************************************
LEVERAGE:

Cohen et al.: 
"For large samples... 2(k + 1)/n" = 2(2 + 1)/615 = 6/615 = 0.00975
"For small samples... 3(k + 1)/n" = 3(2 + 1)/615 = 9/615 = 0.0146

Chen et al.:
Generally, a point with leverage greater than (2k+2)/n should be carefully examined, 
  where k is the number of predictors and n is the number of observations. 
  = (2*2 + 2)/615 = 6/615 = 0.00975

THEREFORE
By Cohen large sample (undefined) and Chen, lever > 0.00975: NONE
By Cohen small sample (undefined), lever > 0.0146: NONE
*/
/**************************************************************************************
COOK'S D

Cohen et al.: 1.0 or critical value of F distribution at alpha=.50 with df = (k+1), n-k-1)
calculator at http://www.pindling.org/Math/Statistics/Textbook/Functions/FDist/FDist_05.htm
 (downloaded excel file to My Documents/Methods/Regression diagnostics)
 When df = (3,612), F = .79

Chen et al.:
The conventional cut-off point is 4/n = 4/615 = 0.0065

THEREFORE
BY COHEN, cookd > .79: NONE (largest = 0.029)
BY CHEN, cookd > .0065: 24 participants
*/
/**************************************************************************************
DFITS

Cohen et al.: 
Conventional cutoff "in small or medium sized data sets" is >|1|.
"...in large data sets, > 2*(sqrt((k + 1)/n).
 = 2*(sqrt(3/615) = 2*(sqrt(0.0049) = 2*0.07 = 0.14

Chen et al.:
The conventional cut-off point for DFITS is (pos or neg) 2*sqrt(k/n) = 2*sqrt(2/615) = 2*sqrt(0.0033) = 2*0.057 = 0.114

THEREFORE
BY Cohen (small/medium) dffit >|1|: NONE
BY Cohen (large) dffit >|.14|: 10low+14high = 24 Ps
BY Chen (conventional) dffit >|.114|: 29low+31high = 60 Ps
*/
/**************************************************************************************/

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.BIAT_MIgood1stc.htm";
TITLE 'Stigma3 consentedus18' ; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
TITLE3 'WHERE IATbeforeCAMI = 1'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = BIAT_MIgood1stc 
  ; 
WHERE biatandcami = 1 AND IATbeforeCAMI = 1; 
RUN;
TITLE3 'TEST INTERACTION'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|BIAT_MIgood1stc 
  ; 
WHERE biatandcami = 1; 
RUN;
QUIT;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.DEMOGRAPH.htm";
TITLE 'Stigma3 consentedus18' ; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
TITLE3 'Include effect of agec'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|feedbackcorrectc|agec; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
TITLE3 'Include effect of agec, drop NS interactions'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc agec; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;

TITLE3 'Include effect of femalec'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|feedbackcorrectc|femalec; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
TITLE3 'Include effect of femalec, drop NS interactions'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc femalec; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;

TITLE3 'Include effect of black1white0c'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|feedbackcorrectc|black1white0c; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
TITLE3 'Include effect of edu3 contrasts'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc edu3code_1vs2c edu3code_0vs1and2c
                  edu3code_1vs2c*IATbeforeCAMIc edu3code_0vs1and2c*IATbeforeCAMIc
                  edu3code_1vs2c*feedbackcorrectc edu3code_0vs1and2c*feedbackcorrectc
; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
TITLE3 'Include effect of edu3 contrasts, drop NS interactions'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc edu3code_1vs2c edu3code_0vs1and2c
                  edu3code_1vs2c*feedbackcorrectc 
; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
QUIT;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.CAMItime.htm";
TITLE 'Stigma3 consentedus18' ; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
TITLE3 'Test effect of time spent on CAMI'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = timecamic60; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
TITLE3 'Include BIAT order condition'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = timecamic60 IATbeforeCAMIc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = timecamic60 IATbeforeCAMIc feedbackcorrectc; 
 OUTPUT OUT=PRED P=YHAT RSTUDENT = resid COOKD= cookd H=lever DFFITS=dffit;
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
TITLE3 'Include effects of feedback and interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = timecamic60 IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
TITLE3 'All interactions'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = timecamic60|IATbeforeCAMIc|feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
ODS HTML CLOSE; 


ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.DROP5fromplot.htm";
PROC SORT DATA = Clean.consentedus18; BY feedbackcorrect IATbeforeCAMI; RUN;
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
TITLE3 'Identify Outliers Observed in Boxplots by Feedback and Condition'; 
PROC UNIVARIATE DATA = Clean.consentedus18; VAR camimean; 
 BY feedbackcorrect IATbeforeCAMI; WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;

DATA nooutliers; SET Clean.consentedus18;
 IF feedbackcorrect = -3 AND IATbeforeCAMI = 0 AND camimean > 3.2 THEN DELETE;
 IF feedbackcorrect = -3 AND IATbeforeCAMI = 1 AND camimean > 3.2 THEN DELETE;
 IF feedbackcorrect = 0 AND IATbeforeCAMI = 0 AND camimean > 3.3 THEN DELETE;
 IF feedbackcorrect = 2 AND IATbeforeCAMI = 1 AND camimean > 3.1 THEN DELETE;
 IF feedbackcorrect = 2 AND IATbeforeCAMI = 1 AND camimean > 3.8 THEN DELETE;

TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
TITLE3 'DROP 5 visually-observed outliers'; 
PROC GLM DATA = nooutliers; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = nooutliers; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
TITLE3 'Include effects of feedback and interaction'; 
PROC GLM DATA = nooutliers; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.DROP6camiHIGH.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors") AND camimean < 3.4'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camimean < 3.4; RUN;
TITLE3 'Include effect of feedback AND camimean < 3.4'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camimean < 3.4; 
RUN;
TITLE3 'Include effects of feedback and interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camimean < 3.4; 
RUN;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.BIATnotDQ.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE usual BIAT cleaning yielded valid score'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIcq; 
 RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIcq feedbackcorrectcq; 
 RUN;
RUN;
TITLE3 'Include effects of feedback and interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIcq feedbackcorrectcq IATbeforeCAMIcq*feedbackcorrectcq; 
 RUN;
TITLE3 'Include effect of BIAT'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIcq BIATcq ; 
 RUN;
TITLE3 'Include effect of BIAT and interaction'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIcq BIATcq IATbeforeCAMIcq*BIATcq; 
 RUN;
ODS HTML CLOSE; 

/**************************************************************************************/
/* DOES IMC MODERATE EFFECT?  NO.  */
/**************************************************************************************/
ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorderIMC.validFeedback.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE biat2 NE . AND feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IMCcorrectc; 
WHERE biat2 NE . AND feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc IMCcorrectc; 
WHERE biat2 NE . AND feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc|IMCcorrectc; 
WHERE biat2 NE . AND feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc IMCcorrectc; 
WHERE biat2 NE . AND feedbackcorrect GE -3 AND feedbackcorrect LE 3; 
RUN;
TITLE3 'WHERE biat2 NE . AND IMC was FAILED'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE biat2 NE . AND feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND IMCcorrect = 0; 
RUN;
TITLE3 'WHERE biat2 NE . AND IMC was PASSED'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE biat2 NE . AND feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND IMCcorrect = 1; 
RUN;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.BIATonIATorderIMC.validFeedback.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL BIAT = imccorrect; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
PROC GLM DATA = Clean.consentedus18; 
 MODEL BIAT = IATbeforeCAMIc imccorrect; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
PROC GLM DATA = Clean.consentedus18; 
 MODEL BIAT = IATbeforeCAMIc|imccorrect; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.camiRTmid90.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camitotalrts GE 46 AND camitotalrts LE 155; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camitotalrts GE 46 AND camitotalrts LE 155; 
RUN;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.camiRTtop95.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camitotalrts GE 45.76; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camitotalrts GE 45.76; 
RUN;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedback.camiRTgt30.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camitotalrts GE 30; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND camitotalrts GE 30; 
RUN;
ODS HTML CLOSE; 


ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedbackNOdq.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND BIATdq = 0; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND BIATdq = 0; 
RUN;
ODS HTML CLOSE; 

ODS HTML PATH=WebOut FILE="GLM.CAMIonIATorder.validFeedbackNOdq.camiRTmid90.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'WHERE feedback was valid (i.e., not "too many errors")'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND BIATdq = 0 AND camitotalrts GE 45 AND camitotalrts LE 156; RUN;
TITLE3 'Include effect of feedback'; 
PROC GLM DATA = Clean.consentedus18; 
 MODEL camimean = IATbeforeCAMIc feedbackcorrectc IATbeforeCAMIc*feedbackcorrectc; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 AND BIATdq = 0 AND camitotalrts GE 45 AND camitotalrts LE 156; 
RUN;
ODS HTML CLOSE; 

/* boxplot cami by BIAT feedback and BIAT order */
DATA Plot1; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -3;
 camiBYordBYscore = 1; KEEP camimean camiBYordBYscore; RUN;
DATA Plot2; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -3;
 camiBYordBYscore = 2; KEEP camimean camiBYordBYscore; RUN;
DATA Plot3; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -2;
 camiBYordBYscore = 4; KEEP camimean camiBYordBYscore; RUN;
DATA Plot4; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -2;
 camiBYordBYscore = 5; KEEP camimean camiBYordBYscore; RUN;
DATA Plot5; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -1;
 camiBYordBYscore = 7; KEEP camimean camiBYordBYscore; RUN;
DATA Plot6; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -1;
 camiBYordBYscore = 8; KEEP camimean camiBYordBYscore; RUN;
DATA Plot7; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 0;
 camiBYordBYscore = 10; KEEP camimean camiBYordBYscore; RUN;
DATA Plot8; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 0;
 camiBYordBYscore = 11; KEEP camimean camiBYordBYscore; RUN;
DATA Plot9; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 1;
 camiBYordBYscore = 13; KEEP camimean camiBYordBYscore; RUN;
DATA Plot10; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 1;
 camiBYordBYscore = 14; KEEP camimean camiBYordBYscore; RUN;
DATA Plot11; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 2;
 camiBYordBYscore = 16; KEEP camimean camiBYordBYscore; RUN;
DATA Plot12; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 2;
 camiBYordBYscore = 17; KEEP camimean camiBYordBYscore; RUN;
DATA Plot13; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 3;
 camiBYordBYscore = 19; KEEP camimean camiBYordBYscore; RUN;
DATA Plot14; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 3;
 camiBYordBYscore = 20; KEEP camimean camiBYordBYscore; RUN;

DATA plotcami; 
 SET Plot1 Plot2 Plot3 Plot4 Plot5
     Plot6 Plot7 Plot8 Plot9 Plot10 
     Plot11 Plot12 Plot13 Plot14 
;
PROC SORT; BY camiBYordBYscore; RUN;

GOPTIONS RESET=PATTERN FTEXT=zapfbu HTITLE=2 HTEXT=2.5; RUN;
PROC BOXPLOT DATA=plotcami;  TITLE ' ';
 AXIS1 OFFSET = (5 PCT, 5 PCT) LABEL=NONE MINOR=NONE  order=(1 to 5 by 1);
 AXIS2 OFFSET = (0 PCT, 0 PCT) LABEL=NONE MINOR=NONE ;
      PLOT (camimean)*camiBYordBYscore/NOTCHES BOXSTYLE=SCHEMATIC VREF=2.1 BOXWIDTHSCALE=1 
                                 CBOXES=BLACK IDSYMBOL=CIRCLE IDCOLOR=BLACK
                                  VAXIS=AXIS1 HAXIS=AXIS2 ; RUN; 
* SETS median cami score (2.1) for those with valid BIAT feedback as horizontal reference ;


/* INCLUDE INVALID BIAT FEEDBACK PEOPLE, boxplot cami by BIAT feedback and BIAT order */
DATA Plot1; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -3;
 camiBYordBYscore = 1; KEEP camimean camiBYordBYscore; RUN;
DATA Plot2; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -3;
 camiBYordBYscore = 2; KEEP camimean camiBYordBYscore; RUN;
DATA Plot3; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -2;
 camiBYordBYscore = 4; KEEP camimean camiBYordBYscore; RUN;
DATA Plot4; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -2;
 camiBYordBYscore = 5; KEEP camimean camiBYordBYscore; RUN;
DATA Plot5; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -1;
 camiBYordBYscore = 7; KEEP camimean camiBYordBYscore; RUN;
DATA Plot6; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -1;
 camiBYordBYscore = 8; KEEP camimean camiBYordBYscore; RUN;
DATA Plot7; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 0;
 camiBYordBYscore = 10; KEEP camimean camiBYordBYscore; RUN;
DATA Plot8; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 0;
 camiBYordBYscore = 11; KEEP camimean camiBYordBYscore; RUN;
DATA Plot9; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 1;
 camiBYordBYscore = 13; KEEP camimean camiBYordBYscore; RUN;
DATA Plot10; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 1;
 camiBYordBYscore = 14; KEEP camimean camiBYordBYscore; RUN;
DATA Plot11; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 2;
 camiBYordBYscore = 16; KEEP camimean camiBYordBYscore; RUN;
DATA Plot12; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 2;
 camiBYordBYscore = 17; KEEP camimean camiBYordBYscore; RUN;
DATA Plot13; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 3;
 camiBYordBYscore = 19; KEEP camimean camiBYordBYscore; RUN;
DATA Plot14; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 3;
 camiBYordBYscore = 20; KEEP camimean camiBYordBYscore; RUN;
DATA Plot15; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect > 3;
 camiBYordBYscore = 22; KEEP camimean camiBYordBYscore; RUN;
DATA Plot16; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect > 3;
 camiBYordBYscore = 23; KEEP camimean camiBYordBYscore; RUN;

DATA plotcami; 
 SET Plot1 Plot2 Plot3 Plot4 Plot5
     Plot6 Plot7 Plot8 Plot9 Plot10 
     Plot11 Plot12 Plot13 Plot14 Plot15 Plot16
;
PROC SORT; BY camiBYordBYscore; RUN;

GOPTIONS RESET=PATTERN FTEXT=zapfbu HTITLE=2 HTEXT=2.5; RUN;
PROC BOXPLOT DATA=plotcami;  TITLE ' ';
 AXIS1 OFFSET = (5 PCT, 5 PCT) LABEL=NONE MINOR=NONE  order=(1 to 5 by 1);
 AXIS2 OFFSET = (0 PCT, 0 PCT) LABEL=NONE MINOR=NONE ;
      PLOT (camimean)*camiBYordBYscore/NOTCHES BOXSTYLE=SCHEMATIC VREF=2.1 BOXWIDTHSCALE=1 
                                 CBOXES=BLACK IDSYMBOL=CIRCLE IDCOLOR=BLACK
                                  VAXIS=AXIS1 HAXIS=AXIS2 ; RUN; 
* SETS median cami score (2.1) for those with valid BIAT feedback as horizontal reference ;

/* EXCLUDE NON-U.S. AND <18 :boxplot cami by BIAT feedback and BIAT order */
DATA Plot1; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -3 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 1; KEEP camimean camiBYordBYscore; RUN;
DATA Plot2; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -3 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 2; KEEP camimean camiBYordBYscore; RUN;
DATA Plot3; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -2 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 4; KEEP camimean camiBYordBYscore; RUN;
DATA Plot4; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -2 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 5; KEEP camimean camiBYordBYscore; RUN;
DATA Plot5; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = -1 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 7; KEEP camimean camiBYordBYscore; RUN;
DATA Plot6; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = -1 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 8; KEEP camimean camiBYordBYscore; RUN;
DATA Plot7; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 0 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 10; KEEP camimean camiBYordBYscore; RUN;
DATA Plot8; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 0 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 11; KEEP camimean camiBYordBYscore; RUN;
DATA Plot9; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 1 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 13; KEEP camimean camiBYordBYscore; RUN;
DATA Plot10; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 1 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 14; KEEP camimean camiBYordBYscore; RUN;
DATA Plot11; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 2 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 16; KEEP camimean camiBYordBYscore; RUN;
DATA Plot12; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 2 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 17; KEEP camimean camiBYordBYscore; RUN;
DATA Plot13; SET Clean.consentedus18;  IF IATbeforeCAMI = 0 AND feedbackcorrect = 3 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 19; KEEP camimean camiBYordBYscore; RUN;
DATA Plot14; SET Clean.consentedus18;  IF IATbeforeCAMI = 1 AND feedbackcorrect = 3 AND age GE 18 AND countrycit IN ('us');
 camiBYordBYscore = 20; KEEP camimean camiBYordBYscore; RUN;

DATA plotcami; 
 SET Plot1 Plot2 Plot3 Plot4 Plot5
     Plot6 Plot7 Plot8 Plot9 Plot10 
     Plot11 Plot12 Plot13 Plot14 
;
PROC SORT; BY camiBYordBYscore; RUN;

GOPTIONS RESET=PATTERN FTEXT=zapfbu HTITLE=2 HTEXT=2.5; RUN;
PROC BOXPLOT DATA=plotcami;  TITLE ' ';
 AXIS1 OFFSET = (5 PCT, 5 PCT) LABEL=NONE MINOR=NONE  order=(1 to 5 by 1);
 AXIS2 OFFSET = (0 PCT, 0 PCT) LABEL=NONE MINOR=NONE ;
      PLOT (camimean)*camiBYordBYscore/NOTCHES BOXSTYLE=SCHEMATIC VREF=2.1 BOXWIDTHSCALE=1 
                                 CBOXES=BLACK IDSYMBOL=CIRCLE IDCOLOR=BLACK
                                  VAXIS=AXIS1 HAXIS=AXIS2 ; RUN; 
* SETS median cami score (2.1) for those with valid BIAT feedback as horizontal reference ;

/* create refined basic task completion variable */
PROC SORT DATA = clean.consentedus18; BY IATbeforeCAMI; RUN;

PROC FREQ DATA = clean.consentedus18; tables omdimc2; WHERE tasksseen GE 3; RUN;
PROC FREQ DATA = clean.consentedus18; tables omdimc2; RUN;
PROC print DATA = clean.consentedus18; var omdimc2 omdimc2rt; RUN;

GOPTIONS RESET=PATTERN FTEXT=DUPLEX HTITLE=2 HTEXT=1; 
  PATTERN COLOR=BLACK VALUE=R3; 
PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Time (ms) spent on IMC when incorrect response made';
 TITLE2 '5-second increments, mid-points 2500ms, 7500ms, etc.';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 40 BY 10) label= (ANGLE=90 'Percent');
   VBAR omdimc2rt /MIDPOINTS= 2500 to 290000 BY 5000
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE imccorrect = 0; RUN; QUIT;

PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Seconds spent on IMC when incorrect response made';
 TITLE2 '5-second increments, mid-points 2.5s, 7.5s, etc.';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 40 BY 10) label= (ANGLE=90 'Percent');
   VBAR omdimc2rts /MIDPOINTS= 2.5 to 57.5 BY 5
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE imccorrect = 0; RUN; QUIT;

PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Seconds spent on IMC when incorrect response made';
  TITLE2 '(excluding the 5% taking > 38 seconds)';
 TITLE3 '1-second increments';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 15 BY 5) label= (ANGLE=90 'Percent');
   VBAR omdimc2rts /MIDPOINTS= .5 to 37.5 BY 1
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE imccorrect = 0 AND omdimc2rts LE 38; RUN; QUIT;

PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Seconds spent on IMC when WRONG response made';
 TITLE2 '5-second increments, mid-points 2.5s, 7.5s, etc.';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 40 BY 10) label= (ANGLE=90 'Percent');
   VBAR timeimc /MIDPOINTS= 2.5 to 117.5 BY 5
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE imccorrect = 0; RUN; QUIT;

PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Seconds spent on IMC when CORRECT response made';
 TITLE2 '5-second increments, mid-points 2.5s, 7.5s, etc.';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 40 BY 10) label= (ANGLE=90 'Percent');
   VBAR timeimc /MIDPOINTS= 2.5 to 117.5 BY 5
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE imccorrect = 1; RUN; QUIT;


PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR timetotal; 
 RUN;

 ODS HTML PATH=WebOut FILE="DESC.TIMEtotal.htm";
TITLE 'Stigma3 DATA = consentedus18';
TITLE2 'Time in seconds from START consent to START debrief';
TITLE3 'Overall';
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR timetotal; RUN;
TITLE3 'When BIAT and CAMI valid';
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR timetotal; WHERE BIATandCAMI=1; RUN;
ODS HTML CLOSE;

PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR timecami; 
 WHERE camivalid = 1; RUN;
PROC FREQ DATA = clean.consentedus18; TABLES camitotalrts; WHERE camivalid = 1; RUN;
PROC FREQ DATA = clean.consentedus18; TABLES timecami; WHERE camivalid = 1; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.BIATtime.htm";
TITLE 'Stigma3: consentedus18';
TITLE2 'Time in seconds WHERE BIAT valid';
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR timeIAT; 
 WHERE feedbackinvalid = 0; RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.CAMItime.htm";
TITLE 'Stigma3 DATA = consentedus18';
TITLE2 'Time in seconds WHERE camivalid = 1';
PROC CORR DATA = clean.consentedus18; VAR camimean camitotalrts timecami; 
 WHERE camivalid = 1; RUN;
TITLE2 'Speed relation with score when CAMI first';
PROC GLM DATA = clean.consentedus18;
 MODEL camimean = camitotalrts; WHERE camivalid = 1 AND IATbeforeCAMI = 0; RUN;
 PROC CORR DATA = clean.consentedus18;
 VAR camimean camitotalrts; WHERE camivalid = 1 AND IATbeforeCAMI = 0; RUN;
TITLE2 'Speed relation with score when BIAT first';
PROC GLM DATA = clean.consentedus18;
 MODEL camimean = camitotalrts; WHERE camivalid = 1 AND IATbeforeCAMI = 1; RUN;
 PROC CORR DATA = clean.consentedus18;
 VAR camimean camitotalrts; WHERE camivalid = 1 AND IATbeforeCAMI = 1; RUN;

PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR camitotalrts; 
 WHERE camivalid = 1; RUN;
PROC UNIVARIATE DATA = clean.consentedus18 PLOT; VAR timecami; 
 WHERE camivalid = 1; RUN;
PROC FREQ DATA = clean.consentedus18; TABLES camitotalrts; WHERE camivalid = 1; RUN;
PROC FREQ DATA = clean.consentedus18; TABLES timecami; WHERE camivalid = 1; RUN;
ODS HTML CLOSE;


GOPTIONS RESET=PATTERN FTEXT=DUPLEX HTITLE=2 HTEXT=2; 
  PATTERN COLOR=BLACK VALUE=R3; 
PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Minutes spent on CAMI when all 10 items answered';
  TITLE2 '(excluding the 1% taking > 5 minutes)';
 TITLE3 '5-second increments, midpoints 17.5, 22.5, etc.';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 15 BY 5) label= (ANGLE=90 'Percent');
   VBAR timecami /MIDPOINTS= 17.5 to 257.5 BY 5
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE camivalid = 1 AND timecami LE 300; RUN; QUIT;

PROC SORT DATA = clean.consentedus18 ; BY iatbeforecami; RUN;
GOPTIONS RESET=PATTERN FTEXT=DUPLEX HTITLE=2 HTEXT=2; 
  PATTERN COLOR=BLACK VALUE=R3; 
PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Minutes spent on CAMI when all 10 items answered';
  TITLE2 '(excluding the 1% taking > 5 minutes)';
 TITLE3 '5-second increments, midpoints 17.5, 22.5, etc.';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 15 BY 5) label= (ANGLE=90 'Percent');
   VBAR timecami /MIDPOINTS= 17.5 to 257.5 BY 5
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE camivalid = 1 AND timecami LE 300; BY iatbeforecami; RUN; QUIT;

GOPTIONS RESET=PATTERN FTEXT=DUPLEX HTITLE=2 HTEXT=2; 
  PATTERN COLOR=BLACK VALUE=R3; 
PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Minutes spent on CAMI when all 10 items answered';
  TITLE2 '(excluding the 1% taking > 5 minutes)';
 TITLE3 '5-second increments, midpoints 17.5, 22.5, etc.';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 15 BY 5) label= (ANGLE=90 'Percent');
   VBAR timecami /MIDPOINTS= 15 to 300 BY 5
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE camivalid = 1 AND timecami LE 300; BY iatbeforecami; RUN; QUIT;

GOPTIONS RESET=PATTERN FTEXT=DUPLEX HTITLE=2 HTEXT=1; 
  PATTERN COLOR=BLACK VALUE=R3; 
PROC GCHART DATA = clean.consentedus18 ;
 TITLE 'Seconds spent on CAMI when all 10 items answered';
  TITLE2 '(excluding the 5% taking > 155 seconds)';
 TITLE3 '5-second increments, midpoints 17.5, 22.5, etc.';
  AXIS1 OFFSET = (2 PCT, 2 PCT) label= NONE;  *this sets the plot in off the axes;
  AXIS2 ORDER=(0 TO 15 BY 5) label= (ANGLE=90 'Percent');
   VBAR camitotalrts /MIDPOINTS= 17.5 to 153.5 BY 5
                   MAXIS = AXIS1
				   RAXIS = AXIS2
                   TYPE = PERCENT ; WHERE camivalid = 1 AND camitotalrts LE 155; RUN; QUIT;

ODS HTML PATH=WebOut FILE="LOGIT.BIATdqONimcinvalid.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'Effect of IMC failure on BIAT disqualification';
PROC FREQ DATA = Clean.consentedus18; TABLES imcfailed*biatdq/CHISQ NOROW NOPERCENT; RUN;
PROC LOGISTIC DATA = Clean.consentedus18 DESCENDING;
    MODEL biatdq = imcfailed
                    /
      CLPARM=WALD ALPHA=.05 CLODDS=PL RSQUARE; 
RUN; QUIT;
TITLE2 'Effect of IMC failure invalid BIAT feedback';
PROC FREQ DATA = Clean.consentedus18; TABLES imcfailed*feedbackinvalid/CHISQ NOROW NOPERCENT; RUN;
PROC LOGISTIC DATA = Clean.consentedus18 DESCENDING;
    MODEL feedbackinvalid = imcfailed
                    /
      CLPARM=WALD ALPHA=.05 CLODDS=PL RSQUARE; 
RUN; QUIT;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="CORR.timecamiANDbias.htm";
TITLE 'Stigma3 DATA = consentedus18'; 
TITLE2 'Overall'; 
PROC CORR DATA = Clean.consentedus18; VAR timecami imccorrect IATbeforeCAMI camimean BIAT1 BIAT2 BIAT; 
RUN;
TITLE2 'Valid BIAT feedback'; 
PROC CORR DATA = Clean.consentedus18; VAR timecami imccorrect IATbeforeCAMI camimean BIAT1 BIAT2 BIAT; 
WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 ; RUN;
RUN;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.validBIATandCAMI.byIATbeforeCAMI.htm";
TITLE 'Stigma3: Proportion of valid scores for both BIAT and CAMI';
TITLE2 'Overall';
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI; WHERE IATbeforeCAMI NE .; RUN;
TITLE2 'WHERE IATbeforeCAMI = 1 (i.e., those assigned to BIATfirst condition)';
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI; WHERE IATbeforeCAMI = 1; RUN;
TITLE2 'WHERE IATbeforeCAMI = 0 (i.e., those assigned to CAMIfirst condition)';
PROC FREQ DATA = Clean.consentedus18; TABLES BIATandCAMI; WHERE IATbeforeCAMI = 0; RUN;
ODS HTML CLOSE;


ODS HTML PATH=WebOut FILE="LOGIT.dropoutCAMI.ONfeedbackANDbiat.htm";
TITLE 'Stigma3 FOR Condition=BIAT-FIRST';
TITLE2 'WHERE IATbeforeCAMI = 1 (i.e., those assigned to BIATfirst condition)';
PROC FREQ DATA = Clean.consentedus18; TABLES camivalid; WHERE IATbeforeCAMI = 1; RUN;
TITLE2 'WHERE IATbeforeCAMI = 1 AND feedbackvalid = 1';
PROC FREQ DATA = Clean.consentedus18; TABLES camivalid; WHERE IATbeforeCAMI = 1 AND feedbackvalid = 1; RUN;
PROC FREQ DATA = Clean.consentedus18; TABLES camivalid*feedbackcorrect/CHISQ NOROW NOPERCENT; WHERE IATbeforeCAMI = 1; RUN;
TITLE2 'Effect of Feedback invalid vs. valid';
PROC LOGISTIC DATA = Clean.consentedus18 ;
    MODEL camivalid = feedbackinvalid
                    /
      CLPARM=WALD ALPHA=.05 CLODDS=PL RSQUARE; 
  WHERE IATbeforeCAMI = 1; RUN; QUIT;
TITLE2 'Effect of Feedback WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 ';
PROC LOGISTIC DATA = Clean.consentedus18 ;
    MODEL camivalid = feedbackcorrect
                    /
      CLPARM=WALD ALPHA=.05 CLODDS=PL RSQUARE; 
  WHERE IATbeforeCAMI = 1 AND feedbackcorrect GE -3 AND feedbackcorrect LE 3 ; RUN; QUIT;
TITLE2 'Effect of Actual BIAT scores WHERE feedbackcorrect GE -3 AND feedbackcorrect LE 3 ';
PROC LOGISTIC DATA = Clean.consentedus18 ;
    MODEL camivalid = biat1
                    /
      CLPARM=WALD ALPHA=.05 CLODDS=PL RSQUARE; 
  WHERE IATbeforeCAMI = 1 AND feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; QUIT;
PROC LOGISTIC DATA = Clean.consentedus18 ;
    MODEL camivalid = biat2
                    /
      CLPARM=WALD ALPHA=.05 CLODDS=PL RSQUARE; 
  WHERE IATbeforeCAMI = 1 AND feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; QUIT;
PROC LOGISTIC DATA = Clean.consentedus18 ;
    MODEL camivalid = biat
                    /
      CLPARM=WALD ALPHA=.05 CLODDS=PL RSQUARE; 
  WHERE IATbeforeCAMI = 1 AND feedbackcorrect GE -3 AND feedbackcorrect LE 3; RUN; QUIT;
ODS HTML CLOSE;

proc freq data = Clean.consentedus18; tables camivalid; WHERE IATbeforeCAMI = 0 AND tasksseen = 4; RUN;
proc freq data = Clean.consentedus18; tables camivalid; WHERE IATbeforeCAMI = 0 AND tasksseen = 5; RUN;

proc freq data = Clean.consentedus18; tables feedbackvalid; WHERE IATbeforeCAMI = 1 AND tasksseen = 4; RUN;
proc freq data = Clean.consentedus18; tables feedbackvalid; WHERE IATbeforeCAMI = 1 AND tasksseen = 5; RUN;
proc freq data = Clean.consentedus18; tables BIAT; WHERE IATbeforeCAMI = 1 AND tasksseen = 5; RUN;
proc freq data = Clean.consentedus18; tables feedbackvalid; WHERE IATbeforeCAMI = 1 AND tasksseen = 6; RUN;


ODS HTML PATH=WebOut FILE="LOGIT.dropoutBIAT.ONcami.htm";
TITLE 'Stigma3 FOR Condition=CAMI-FIRST';
TITLE2 'WHERE IATbeforeCAMI = 0';
PROC FREQ DATA = Clean.consentedus18; TABLES feedbackvalid; WHERE IATbeforeCAMI = 0; RUN;

TITLE2 'WHERE IATbeforeCAMI = 0 AND camivalid = 1';
PROC FREQ DATA = Clean.consentedus18; TABLES feedbackvalid; WHERE IATbeforeCAMI = 0 AND camivalid = 1; RUN;

TITLE2 'Effect of CAMIscore on persisting and receiving valid BIAT feedback';
PROC LOGISTIC DATA = Clean.consentedus18;
    MODEL feedbackvalid = camimean
                    /
      CLPARM=WALD ALPHA=.05 CLODDS=PL RSQUARE; 
  WHERE IATbeforeCAMI = 0; RUN; QUIT;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="COMPLETE.CONTENTS.htm";
TITLE 'Stigma3 DATA = consentedus18';
proc contents data=CLean.consentedus18;run;
ODS HTML CLOSE;

/* RENAME DATA SET AND DROP zipcode for public sharing */
DATA clean.stigma1public; SET CLean.consentedus18;
 DROP zipcode;
ODS HTML PATH=WebOut FILE="CONTENTS.Stigma1.htm";
TITLE 'DATA = stigma1public';
proc contents data=CLean.stigma1public;run;
ODS HTML CLOSE;

ODS HTML PATH=WebOut FILE="DESC.session_date.htm";
TITLE 'Stigma3 DATA = consentedus18';
PROC FREQ DATA = clean.consentedus18; TABLES session_date; RUN;
ODS HTML CLOSE;

